/* maketests.pl:  a simple Prolog program to generate some sample
 * test cases for regex parsers.
 * Some of the output may require hand-editing to make it more
 * interesting.  And the focus is on the parts of the grammar that
 * have changed, so the test cases are not exhaustive.
 * Oh, yes, and if the grammar is ambiguous, I don't mind.	
 */

/* Copyright (c) 2008 World Wide Web Consortium, 
 * (Massachusetts Institute of Technology, European Research 
 * Consortium for Informatics and Mathematics, Keio University). 
 */

/* This file is part of Xerophily, a parser for XSD regular expressions.
 *
 * Xerophily is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published
 * by the Free Software Foundation, either version 3 of the License,
 * or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser Public License for more details.
 *
 * You should have received a copy of the GNU Lesser Public License
 * along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 */

/* Revisions:
 * 2008-03-26 : CMSMcQ : make a quick hack to generate some appropriately 
 *                       gnarly tests.
 */
:- module(maketests,
	  [ maketest/1,
	    maketest/2,
	    testcase/2
	  ]).

/* Use 'small' or 'full' as parameter, to control
 * how many things get used in generation of strings.
 * The value 'small' reduces the alphabet a lot, in order
 * to get to 'interesting' constructs sooner.
 */
maketest(String) :-
  maketest(small,String).
maketest(KW,String) :-
  append(String,[],String),
  regex(KW,String,[]),
  maplist(put,String).
testcase --> regex(small).

/* compound expressions */
/* alternation */
regex(KW) --> branch(KW), branches(KW).
branches(_) --> [].
branches(KW) --> "|", branch(KW), branches(KW).

branch(KW) --> pieces(KW).

pieces(_) --> [].
pieces(KW) --> piece(KW), pieces(KW).

piece(KW) --> atom(KW), opt_quantifier(KW).
opt_quantifier(_) --> [].
opt_quantifier(KW) --> quantifier(KW).

quantifier(full) --> "?".
quantifier(_) --> "*".
quantifier(full) --> "+".
quantifier(_) --> "{4}".
quantifier(full) --> "{1,3}".
quantifier(full) --> "{0,5}".
quantifier(full) --> "{2,}".
quantifier(full) --> "{0,0}".

atom(KW) --> char(KW).
atom(KW) --> charClass(KW).
atom(KW) --> "(", regex(KW), ")".
atom(KW) --> char(KW), "-", char(KW).
atom(KW) --> charref(KW).
atom(KW) --> escape(KW).
atom(KW) --> "\p{", category(KW), "}".
atom(_) --> "IsBasicLatin".
atom(_) --> "Lu".
atom(_) --> "Cherokee".
category(_) --> "Lu".
category(full) --> "Nd".
category(full) --> "IsBasicLatin".
category(_) --> "IsCherokee".
category(full) --> "Cherokee".
category(KW) --> atoms(KW).

char(_) --> "a".
char(_) --> "d".
char(_) --> "-".
char(full) --> "[".
char(full) --> "]".
char(_) --> "{".
char(full) --> "}".
char(full) --> "(".
char(full) --> ")".
char(_) --> "*".
char(full) --> "?".
char(full) --> "+".
char(_) --> "\\".
char(_) --> "^".
char(_) --> "n".
char(_) --> "|".
char(_) --> ".".

charClass(KW) --> "[", opt_caret, atoms(KW), opt_subtraction(KW), "]".
opt_caret --> [].
opt_caret --> "^".
opt_subtraction(_) --> [].
opt_subtraction(KW) --> "-", charClass(KW).

atoms(_) --> [].
atoms(KW) --> atom(KW), atoms(KW).

charref(_) --> "&#xB0B0;".
charref(full) --> "&#", decdigit, decdigits, ";".
charref(full) --> "&#x", hexdigit, hexdigits, ";".
decdigits --> [].
decdigits --> decdigit, decdigits.
decdigit --> "0".
decdigit --> "2".
decdigit --> "3".
hexdigits --> [].
hexdigits --> hexdigit, hexdigits.
hexdigit --> decdigit.
hexdigit --> "A".
hexdigit --> "C".
hexdigit --> "F".

escape(_) --> "\\n".
escape(full) --> "\\t".
escape(_) --> "\\\\".
escape(full) --> "\\.".
escape(_) --> "\\|".
escape(full) --> "\\-".
escape(_) --> "\\^".
escape(_) --> "\\*".
escape(full) --> "\\?".
escape(full) --> "\\+".
escape(_) --> "\\{".
escape(full) --> "\\}".
escape(full) --> "\\(".
escape(full) --> "\\)".
escape(full) --> "\\[".
escape(full) --> "\\]".
escape(full) --> "\\s".
escape(full) --> "\\S".
escape(full) --> "\\w".
escape(full) --> "\\W".
