/* readxsd.pl:  read an XSD schema document and look for
 * pattern elements.
 */

/* Copyright (c) 2008, 2011 World Wide Web Consortium, 
 * (Massachusetts Institute of Technology, European Research 
 * Consortium for Informatics and Mathematics, Keio University). 
 * All Rights Reserved. This work is distributed under the 
 * W3C(TM) Software License [1] in the hope that it will be 
 * useful, but WITHOUT ANY WARRANTY; without even the implied 
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
 */

/* This file is part of Xerophily, a parser for XSD regular expressions. */

/* Revisions:
 * 2008-03-26 : CMSMcQ : make a quick hack to see if I can run
 *                       my tests.
 */

:- module(readxsd,[
		   read_xsd_get_patterns/2,
		   read_xsd_check_patterns/2,
		   read_xsd_check_patterns_allgrammars/1,
		   annotate_xsd/1,
		   annotate_xsd/2,
		   annotate_xsd/3
		  ]).

:- use_module('parseregex.pl').
:- use_module('g_opts.pl').
:- use_module('ast.pl').

annotate_xsd(File) :-
	default_grammar(G),
	annotate_xsd(File,[G]).

annotate_xsd(File,Grammars) :-
	telling(CurrentOut),
	annotate_xsd(File,Grammars,CurrentOut).

annotate_xsd(File,Grammars,Outfile) :-
	load_structure(File,Content,[dialect(xmlns), space(remove)]),
	annotate_patterns(Content,Grammars,Result),
	open(Outfile,write,Out),
	xml_write(Out,Result,[nsmap([xsd='http://www.w3.org/2001/XMLSchema',
				    rpt='http://www.w3.org/XML/2008/03/xsdl-regex/report'])
			     ]),
	close(Out).

/* earlier, this predicate had doctype('none.dtd') in it.
  annotate_xsd(File,Grammars,Outfile) :-
  	load_structure(File,Content,[dialect(xmlns), space(remove)]),
  	annotate_patterns(Content,Grammars,Result),
  	open(Outfile,write,Out),
  	xml_write(Out,Result,[nsmap([xsd='http://www.w3.org/2001/XMLSchema',
  				    rpt='http://www.w3.org/XML/2008/03/xsdl-regex/report']),
  			     doctype('none.dtd')]),
  	close(Out).
*/

/* annotate_patterns on lists of content or of attributes */
annotate_patterns([],_Gs,[]).
annotate_patterns([N0|Ns0],Gs,[N|Ns]) :-
	annotate_pattern(N0,Gs,N),
	annotate_patterns(Ns0,Gs,Ns).

/* on individual content nodes and attributes */
annotate_pattern(element('http://www.w3.org/2001/XMLSchema':pattern,Atts,Ch0),
		 Gs,
		 element('http://www.w3.org/2001/XMLSchema':pattern,Atts,Ch)) :-
	(   member((value=Expr),Atts)
	->  allparses(Expr,Gs,ASTs0),
	    %%% ast_awf(ASTs0,ASTs),
	    maplist(ast_awf,ASTs0,ASTs),
	    length(ASTs,Count),
	    Ch = [element('http://www.w3.org/2001/XMLSchema':annotation,
			     [],
			     [element('http://www.w3.org/2001/XMLSchema':appinfo,
				      [
				       xmlns:rpt='http://www.w3.org/XML/2008/03/xsdl-regex/report',
				      'http://www.w3.org/XML/2008/03/xsdl-regex/report':app='Prolog regex parser',
				      'http://www.w3.org/XML/2008/03/xsdl-regex/report':parsecount=Count
				      ],
				      ASTs)])
		 |Ch0]
	;   Ch = [element(no_value_found,[],[]) | Ch0]
	).
annotate_pattern(element(GI,Atts,Ch0),Gs,element(GI,Atts,Ch)) :-
	not(GI='http://www.w3.org/2001/XMLSchema':pattern),
	annotate_patterns(Ch0,Gs,Ch).
annotate_pattern(Atom,_Gs,Atom) :-
	atom(Atom).


/* The rest of this is the first cut, which seems to duplicate other
 * code, but not reliably */

read_xsd_get_patterns(File,Patterns) :-
	load_structure(File,Content,[dialect(xmlns)]),
	find_patterns(Content,[],Patterns).

/* find_patterns on lists of content or of attributes */
find_patterns([],Patterns,Patterns).
find_patterns([Node|Nodes],Patterns0,Patterns) :-
	find_patterns(Node,Patterns0,Patterns1),
	find_patterns(Nodes,Patterns1,Patterns).

/* on individual content nodes and attributes */
find_patterns(element('http://www.w3.org/2001/XMLSchema':pattern,Atts,_Ch),
	      Patterns0,
	      Patterns) :-
	find_patterns(Atts,Patterns0,Patterns).
find_patterns(element(GI,_Atts,Ch),
	      Patterns0,
	      Patterns) :-
	not(GI='http://www.w3.org/2001/XMLSchema':pattern),
	find_patterns(Ch,Patterns0,Patterns).
find_patterns((value=Expr),Ps,[Expr|Ps]).
find_patterns((Attname=_),Ps,Ps) :-
	Attname \= value.
find_patterns(Atom,Patterns,Patterns) :-
	atom(Atom).


/* check_patterns(File,Grammar) : parse patterns in File against Grammar
 */
read_xsd_check_patterns(F,G) :-
	read_xsd_get_patterns(F,Ps),
	check_patterns_report(Ps,[G]).

check_patterns_report([],_G).
check_patterns_report([P|Ps],Gs) :-
	check_pattern(P,Gs,[],ASTs),
	report_several(P,Gs,ASTs),
	check_patterns_report(Ps,Gs).
/*
check_pattern(_P,[]).
check_pattern(P,[G|Gs]) :-
	(   regex(P,G,AST)
	;   AST = error(string_not_recognized)
	),
	report(P,G,AST),
	check_pattern(P,Gs).
*/
check_pattern(_P,[],ASTs0,ASTs) :-
	collapse(ASTs0,ASTs).
check_pattern(P,[G|Gs],ASTs0,ASTs) :-
	(   regex(P,G,AST)
	;   AST = parse(G,error(string_not_recognized))
	),
	check_pattern(P,Gs,[AST|ASTs0],ASTs).

/* collapse(ASTlist0,ASTlist): true iff ASTlist is ASTlist0, deduped */
collapse(A0,A) :-
	collapse(A0,[],A).
/* collapse(Input,Accumulator,Result) 
 * collapse the first input parse into the accumulator, then recur */
collapse([A|As],Res0,Res) :-
	collapse(A,Res0,Res1),
	collapse(As,Res1,Res).
collapse([],Res,Res).

/* collapse(Parse,Acc,Result):
 * true if Result is result of inserting Parse into Acc. */
/* if accumulator is empty, just return Parse, 
 * changing grammar info from atom to list */
collapse(parse(G,AST),[],[parse([G],AST)]).

/* If needle matches head of haystack, add its name to list of grammars */
collapse(parse(G,AST),
	 [parse(Gs,AST)|Parses],
	 [parse([G|Gs],AST)|Parses]).
/* If needle doesn't match head of haystack, try again */
collapse(parse(G,AST0),
	 [parse(Gs,AST1)|Parses0],
	 [parse(Gs,AST1)|Parses]) :-	
	AST0 \= AST1,
	collapse(parse(G,AST0),Parses0,Parses).

report(P,G,AST) :-
	write('<testresult grammar="'),
	write(G),
	write('" string="'),
	write(P),
	write('">'),
	nl,
	write(AST),
	nl,
	write('</test>'), 
	nl.
report_several(P,Gs,ASTs) :-
	write('<test string="'),
	write(P),
	write('" resultcount="'),
	length(ASTs,N),
	write(N),
	write('" grammars="'),
	write(Gs),
        write('">'),
	nl,
	report_parses(ASTs),
	write('</test>'),
	nl.
report_parses([]).
report_parses([parse(Gs,AST)|Parses]) :-
	write('<result grammars="'),
	write(Gs),
	write('">'),
	nl,
	write(AST),
	write('</result>'),
	nl,
	report_parses(Parses).
	
read_xsd_check_patterns_allgrammars(File) :-
	read_xsd_get_patterns(File,Ps),
	get_grammars(Gs),
	check_patterns_report(Ps,Gs).
	








