%option 8bit nodefault %{ /* * WARNING: This is not an XML 1.0 parser, but an experiment with an XML-like * language. See http://www.w3.org/XML/9707/XML-in-C * * Author: Bert Bos * Created: 9 July 1997 * * Copyright © 1997-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software-19980720.html */ #include #include #include #include #include "y.tab.h" static int keep; /* To store start condition */ static char* word(char *s) { char *buf; int i, k; for (k = 0; isspace(s[k]) || s[k] == '<'; k++) ; for (i = k; s[i] && ! isspace(s[i]); i++) ; buf = malloc((i - k + 1) * sizeof(char)); strncpy(buf, &s[k], i - k); buf[i - k] = '\0'; return buf; } %} nl (\r\n|\r|\n) ws [ \t\r\n]+ open {nl}?"<" close ">"{nl}? namestart [A-Za-z\200-\377_] namechar [A-Za-z\200-\377_0-9.-] esc "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";" name {namestart}{namechar}* data ([^<\n&]|\n[^<&]|\n{esc}|{esc})+ comment {open}"!--"([^-]|"-"[^-])*"--"{close} string \"([^"&]|{esc})*\"|\'([^'&]|{esc})*\' version {open}"?XML-VERSION 1.0?"{close} encoding {open}"?XML-ENCODING"{ws}{name}{ws}?"?"{close} attdef {open}"?XML-ATT" /* * The CONTENT mode is used for the content of elements, i.e., * between the ">" and "<" of element tags. * The INITIAL mode is used outside the top level element * and inside markup. */ %s CONTENT %% {ws} {/* skip */} {version} {return VERSION;} {encoding} {yylval.s = word(yytext + 14); return ENCODING;} "/" {return SLASH;} "=" {return EQ;} {close} {BEGIN(CONTENT); return CLOSE;} {name} {yylval.s = strdup(yytext); return NAME;} {string} {yylval.s = strdup(yytext); return VALUE;} "?"{close} {BEGIN(keep); return ENDDEF;} {attdef} {keep = YY_START; BEGIN(INITIAL); return ATTDEF;} {open}{ws}?{name} {BEGIN(INITIAL); yylval.s= word(yytext); return START;} {open}{ws}?"/" {BEGIN(INITIAL); return END;} {comment} {yylval.s = strdup(yytext); return COMMENT;} {data} {yylval.s = strdup(yytext); return DATA;} . {fprintf(stderr, "!ERROR(%c)\n", *yytext);} {nl} {/* skip, must be an extra one at EOF */;}