/* $Id: Parser.ll1,v 1.1 1997/05/07 13:55:17 bbos Exp $ */ %package "w3c.xmlOnline.parser" %import "w3c.xmlOnline.parser.XMLTokenizer" %import "w3c.xmlOnline.parser.XMLListener" %import "w3c.xmlOnline.parser.DChain" %import "java.util.Vector" %import "java.util.Enumeration" %import "java.util.Hashtable" %import "java.io.PrintStream" /* * Removed idinfo: all attributes called ID are now considered to be * IDs and IDREFs are not needed, since they can be handled with * HREFs. Also removed PI, since the only PIs recognized are those for * XML itself. */ %code { /** Indexed on elt name, contains DChains with default attributes */ private Hashtable defaultattrs = new Hashtable(); /** Most recently created DChain */ private DChain chaintop = null; /** The listener objects interested in getting parser events */ private Vector listeners = new Vector(); /** # of listeners (duplicates listeners.size() */ private int nrlisteners = 0; /** * Register an XMLListener with the parser. An XMLListener is * an object that will be called whenever an XML-event occurs: * the parser finds a start tag, an end tag, an attribute, * a PI, etc. There may be any number of listeners registered * @param listener the listener to register * @return this */ public Parser addListener(XMLListener listener) { listeners.addElement(listener); nrlisteners++; return this; } /** * This function is called by the parser when it has recognized a * comment. It calls each of the listeners in turn. Note that * comments are not part of the data model, but they are passed on * anyway, for the benefit of XML editors and similar programs. * @param comment the comment string (without <!-- -->) */ protected void handleComment(String comment) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleComment(comment); } /** * This function is called by the parser when it has * recognized a start tag. This is the beginning of * a lexical scope: if any <?xml default... ?> * declarations are found, they only apply to elements * in this scope. * @param tag the tag name in lowercase */ protected void handleStartTag(String tag) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleStartTag(tag); } /** * This function is called by the parser when it has * recognized an attribute/value pair. The attribute * belongs to the tag that was handled by the most recent * call to handleStartTag(). *

Note that there is no indication of whether this * attribute was actually on the tag, or whether it * is the declared default value for the attribute on * this tag (<?XML DEFAULT...?>) * @param attribute the name of the attribute in lowercase * @param value the value of the attribute (a string) */ protected void handleAttribute(String attribute, String value) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleAttribute(attribute, value); } /** * This function is called by the parser when it has * recognized an end tag or the end of an empty tag. * This is also the end of a lexical scope. * @param tag the name of the tag in lowercase */ protected void handleEndTag(String tag) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleEndTag(tag); } /** * This function is called by the parser when it has * recognized character data. The function may be called * several times without intervening calls to handleStartTag() * or handleEndTag(). The content of an element is thus the * concatenation of all consecutive calls to handleData(). * @param data the character data */ protected void handleData(String data) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleData(data); } /** * This function is called by the parser when it has recognized a * processing instruction. Note that PIs are not part of the data * model, but they are passed on anyway, for the benefit of XML * editors and similar programs. * * @param pi the processing instruction (without <? ?>) */ protected void handlePI(String pi) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handlePI(pi); } /** * This function is called by the parser when it has * recognized a doctype declaration. Documents can be nested * and there may thus be a doctype in the middle of a * document. A document constitutes a scope for the * default attributes, just like an element. * @param root the name of the start tag * @param doctype the URL defining the document type (may be null) */ protected void handleStartDoc(String root, String doctype) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleStartDoc(root, doctype); } /** * This function is called by the parser when it has * recognized the end of a document or subdocument. * @param root the name of the start tag */ protected void handleEndDoc(String root) { for (int i = 0; i < nrlisteners; i++) ((XMLListener)listeners.elementAt(i)).handleEndDoc(root); } /* Add <?XML IDINFO... ?> here as well? */ /** * Encode a string with certain characters replaced by * numerical character entities. * @param s string to encode * @return the encoded string */ protected String protect(String s) { StringBuffer t = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == '&') t.append("&#" + (int)'&' + ";"); else if (c == '<') t.append("&#" + (int)'<' + ";"); else if (c == '>') t.append("&#" + (int)'>' + ";"); else if (c == '\'') t.append("&#" + (int)'\'' + ";"); else t.append(c); } return t.toString(); } } %terminals /* T = in tags; C = in content; M = in marked section */ COMMENT /* C: "" */ DEFAULT /* C: "" */ ENCODING /* C: "" */ EQ /* T: "=" */ ETAGO /* C: "" */ IDINFO /* C: "" */ MSSTART /* C: "" */ /* Error tokens, generated by scanner, but not used in grammar */ UNCLOSED_COMMENT UNCLOSED_PI UNKNOWN_XML UNCLOSED_MS UNKNOWN_MARKUP UNKNOWN_ENT UNCLOSED_LIT %rules document {String[] root = {null};} : prolog(root) element misc* {if (root[0] != null) handleEndDoc(root[0]);} ; prolog(String[] root) : encodingdecl? misc* [doctypedecl(root) misc*]? ; misc : COMMENT~{handleComment(((XMLTokenizer)in).value());} /* | PI~{handlePI(((XMLTokenizer)in).value());} */ | defaultinfo /* | idinfo */ ; doctypedecl(String[] root) {String url[] = {null};} : DOCTYPE NAME~{root[0] = ((XMLTokenizer)in).value().toLowerCase();} extid(url)? GT {handleStartDoc(root[0], url[0]);} ; attribute(Hashtable attrs) {String key = null, val = null;} : NAME~{key = ((XMLTokenizer)in).value().toLowerCase();} EQ LITERAL~{val = ((XMLTokenizer)in).value();} {attrs.put(key, val);} ; etag(String curelt) {String tag = null;} : ETAGO NAME~{tag = ((XMLTokenizer)in).value().toLowerCase(); if (!tag.equals(curelt)) {nrerrors++; deletion(NAME);} else handleEndTag(tag);} GT ; content : [ element | PCDATA~{handleData(((XMLTokenizer)in).value());} | ms | misc | subdoc ]* ; subdoc {DChain marker = chaintop; String[] root = {null};} : doctypedecl(root) misc* element {while (chaintop != marker) { /* Restore previous lexical scope */ if (chaintop.nextInBucket != null) defaultattrs.put(chaintop.tag, chaintop.nextInBucket); else defaultattrs.remove(chaintop.tag); chaintop = chaintop.nextInChain; } handleEndDoc(root[0]);} ; element {String me = null; Hashtable attrs = new Hashtable(); Vector v; DChain marker = chaintop;} : LT NAME~{me = ((XMLTokenizer)in).value().toLowerCase(); handleStartTag(me); v = (Vector)defaultattrs.get(me); /* Fill in default attribs */ if (v != null) { for (int i = v.size() - 1; i >= 0; i = i - 2) { String attr = (String)v.elementAt(i-1); String val = (String)v.elementAt(i); attrs.put(attr, val); } }} attribute(attrs)* {for (Enumeration e = attrs.keys(); e.hasMoreElements();) { String key = (String)e.nextElement(); handleAttribute(key, (String)attrs.get(key)); }} [ GT content etag(me) | EMPTY {handleEndTag(me);} ] {while (chaintop != marker) { /* Restore previous lexical scope */ if (chaintop.nextInBucket != null) defaultattrs.put(chaintop.tag, chaintop.nextInBucket); else defaultattrs.remove(chaintop.tag); chaintop = chaintop.nextInChain; }} ; encodingdecl : ENCODING EQ qencoding ENDPI ; extid(String[] stringreturn) : LITERAL~{stringreturn[0] = ((XMLTokenizer)in).value();} ; ms : MSSTART MSDATA~{handleData(((XMLTokenizer)in).value());} MSEND ; qencoding : LITERAL~{try { ((XMLTokenizer)in).setEncoding(((XMLTokenizer)in).value()); } catch (UnknownEncoding e) { deletion(LITERAL); }} ; /* idinfo : IDINFO NAME~{if (!((XMLTokenizer)in).value().equalsIgnoreCase("ids")) { nrerrors++; deletion(NAME);}} EQ quotedpairs /* ...setIDs(...) */ NAME~{if (!((XMLTokenizer)in).value().equalsIgnoreCase("refs")) { nrerrors++; deletion(NAME);}} EQ quotedpairs /* ...setRefs(...) */ ENDPI ; */ quotedpairs : LITERAL ; defaultinfo {DChain v = null, oldbucket; String attr = null, val, tag, all = "";} : DEFAULT {all = all + "xml default ";} NAME~{tag = ((XMLTokenizer)in).value().toLowerCase(); oldbucket = (DChain)defaultattrs.get(tag); v = new DChain(tag, oldbucket, chaintop); chaintop = v; defaultattrs.put(tag, v); all = all + tag;} [ NAME~{attr = ((XMLTokenizer)in).value().toLowerCase();} EQ LITERAL~{val = ((XMLTokenizer)in).value(); v.addElement(attr); v.addElement(val); all = all + " " + attr + "='" + protect(val) + "'";} ]* ENDPI {handlePI(all);} ;