%
%   URI Grammar Rules
%
%     1.  start with appendix B of http://www.ietf.org/rfc/rfc2396.txt
%     2.  transform   |  --> ;
%                     =  --> -->        (except as "=")
%                     URI-reference --> uri_reference
%                     IPvsaddress --> ipv4address
%     3.  whitespace, final period
%     4.  convert statements with [] and * into multiple rules
%            [ ] optional stuff
%                     just a version with and without the phrase
%                     or  "[ " ==> "( []; "
%                     and " ]"  ==> " )"
%            *   repeated stuff
%            n*  repeated and present at least once
%                     turn *foo into star_foo, add the two rules for it
% ULP -- should have been more consistent about #4
%   should use a grammar for THIS; one which can grab
%   the part names and make them available, building the tree
%     5.  space into ,

% there may be some merit to this approach, but I'm not sure yet.
:- import memberchk/2 from basics.
uri_reference(A) --> ([]; 
		      absoluteURI(A), { memberchk(abs, A) }; 
		      relativeURI(A), { memberchk(rel, A) }), 
		     ([]; "#", fragment).

uri_reference --> ([]; absoluteURI, relativeURI), 
		  ([]; "#", fragment).

absoluteURI   --> scheme, ":", ( hier_part ; opaque_part ).

relativeURI   --> ( net_path ; abs_path ; rel_path ) .
relativeURI   --> ( net_path ; abs_path ; rel_path ), "?", query .

hier_part     --> ( net_path ; abs_path ).
hier_part     --> ( net_path ; abs_path ), "?", query.

opaque_part   --> uric_no_slash, star_uric.

star_uric     --> ""; uric, star_uric.


uric_no_slash --> unreserved ; escaped ; ";" ; "?" ; ":" ; "@" ;
                  "&" ; "=" ; "+" ; "$" ; ",".

net_path      --> "//", authority.
net_path      --> "//", authority, abs_path.
abs_path      --> "/",  path_segments.
rel_path      --> rel_segment.
rel_path      --> rel_segment, abs_path.
rel_segment   --> plus10.
plus10 --> item10; item10, plus10.
item10 --> unreserved ; escaped ;
           ";" ; "@" ; "&" ; "=" ; "+" ; "$" ; ",".

scheme  --> alpha, star12.
star12  --> []; item12, star12.
item12  --> alpha ; digit ; "+" ; "-" ; ".".

authority     --> server ; reg_name.

reg_name--> plus21.
plus21 --> item21; item21, plus21.
item21 --> unreserved ; escaped ; "$" ; "," ;
            ";" ; ":" ; "@" ; "&" ; "=" ; "+".

server  --> ( []; ( []; userinfo, "@" ), hostport ).

userinfo--> star22.
star22 --> ( [] ; item22, star22 ).
item22 -->  unreserved ; escaped ;
            ";" ; ":" ; "&" ; "=" ; "+" ; "$" ; ",".

hostport--> host.
hostport--> host, ":", port.
host    --> hostname ; ipv4address.
hostname--> star23, toplabel.
hostname--> star23, toplabel, "." .
star23 --> ( []; item23, star23 ).
item23 --> domainlabel, ".".

domainlabel   --> alphanum ; (alphanum, star24, alphanum).
star24 --> ( []; item24, star24 ).
item24 --> alphanum ; "-".
toplabel--> alpha ; alpha, star24, alphanum.
ipv4address   --> plus_digit, ".", plus_digit, ".", plus_digit, ".", plus_digit.
plus_digit --> digit; digit, plus_digit.
star_digit --> []; digit, star_digit.
port    --> star_digit.
path    --> ( abs_path ; opaque_part ; [] ).
path_segments --> segment, star_path_segments.
star_path_segments --> []; "/", segment, star_path_segments.
segment --> star_pchar, star_segment_param.
star_segment_param --> (""; ( ";", param ), star_segment_param).

param   --> star_pchar.
star_pchar --> (""; pchar, star_pchar).	 
pchar   --> unreserved ; escaped ;
            ":" ; "@" ; "&" ; "=" ; "+" ; "$" ; ",".
query   --> star_uric.
fragment--> star_uric.
uric    --> reserved ; unreserved ; escaped.
reserved--> ";" ; "/" ; "?" ; ":" ; "@" ; "&" ; "=" ; "+" ;
            "$" ; ",".
unreserved    --> alphanum ; mark.
mark    --> "-" ; "_" ; "." ; "!" ; "~" ; "*" ; "'" ;
            "(" ; ")".

escaped --> "%", hex, hex.
hex     --> digit ; "A" ; "B" ; "C" ; "D" ; "E" ; "F" ;
            "a" ; "b" ; "c" ; "d" ; "e" ; "f".

alphanum--> alpha ; digit.
alpha   --> lowalpha ; upalpha.

lowalpha --> "a" ; "b" ; "c" ; "d" ; "e" ; "f" ; "g" ; "h" ; "i" ;
             "j" ; "k" ; "l" ; "m" ; "n" ; "o" ; "p" ; "q" ; "r" ;
             "s" ; "t" ; "u" ; "v" ; "w" ; "x" ; "y" ; "z".
upalpha  --> "A" ; "B" ; "C" ; "D" ; "E" ; "F" ; "G" ; "H" ; "I" ;
             "J" ; "K" ; "L" ; "M" ; "N" ; "O" ; "P" ; "Q" ; "R" ;
             "S" ; "T" ; "U" ; "V" ; "W" ; "X" ; "Y" ; "Z".
digit    --> ("0" ; "1" ; "2" ; "3" ; "4" ; "5" ; "6" ; "7" ;
             "8" ; "9").











