/* $Id: Langname_Scanner.l,v 1.1 2008/04/06 17:10:48 eric Exp SPARQLScanner.ll 28 2007-08-20 10:27:39Z tb $ -*- mode: c++ -*- */ /** \file SPARQLScanner.ll Define the Flex lexical scanner */ %{ /*** C/C++ Declarations ***/ #include "SPARQLEnums.h" #include "SPARQLParser.h" %} /*** Flex Declarations and Options ***/ /* change the name of the scanner class. results in "SPARQLFlexLexer" */ %option prefix="SPARQL" /* the manual says "somewhat more optimized" */ %option batch /* enable scanner to generate debug output. disable this for release * versions. */ %option debug /* no support for include files is planned */ %option noyywrap nounput /* START patterns for SPARQL terminals */ IT_BASE "BASE" IT_PREFIX "PREFIX" IT_SELECT "SELECT" IT_DISTINCT "DISTINCT" IT_REDUCED "REDUCED" GT_TIMES "*" IT_CONSTRUCT "CONSTRUCT" IT_DESCRIBE "DESCRIBE" IT_ASK "ASK" IT_FROM "FROM" IT_NAMED "NAMED" IT_WHERE "WHERE" IT_ORDER "ORDER" IT_BY "BY" IT_ASC "ASC" IT_DESC "DESC" IT_LIMIT "LIMIT" IT_OFFSET "OFFSET" GT_LCURLEY "{" GT_RCURLEY "}" GT_DOT "." IT_OPTIONAL "OPTIONAL" IT_GRAPH "GRAPH" IT_UNION "UNION" IT_FILTER "FILTER" GT_COMMA "," GT_LPAREN "(" GT_RPAREN ")" GT_SEMI ";" IT_a "a" GT_LBRACKET "\[" GT_RBRACKET "\]" GT_OR "||" GT_AND "&&" GT_EQUAL "=" GT_NEQUAL "!=" GT_LT "<" GT_GT ">" GT_LE "<=" GT_GE ">=" GT_PLUS "+" GT_MINUS "-" GT_DIVIDE "/" GT_NOT "!" IT_STR "STR" IT_LANG "LANG" IT_LANGMATCHES "LANGMATCHES" IT_DATATYPE "DATATYPE" IT_BOUND "BOUND" IT_sameTerm "sameTerm" IT_isIRI "isIRI" IT_isURI "isURI" IT_isBLANK "isBLANK" IT_isLITERAL "isLITERAL" IT_REGEX "REGEX" GT_DTYPE "^^" IT_true "true" IT_false "false" IRI_REF "<"(([#-;=?-\[\]_a-z~-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))]))*">" LANGTAG "@"([A-Za-z])+(("-"([0-9A-Za-z])+))* INTEGER ([0-9])+ DECIMAL (([0-9])+"."([0-9])*)|("."([0-9])+) INTEGER_POSITIVE "+"({INTEGER}) DECIMAL_POSITIVE "+"({DECIMAL}) INTEGER_NEGATIVE "-"({INTEGER}) DECIMAL_NEGATIVE "-"({DECIMAL}) EXPONENT [Ee]([+-])?([0-9])+ DOUBLE (([0-9])+"."([0-9])*({EXPONENT}))|(("."(([0-9]))+({EXPONENT}))|((([0-9]))+({EXPONENT}))) DOUBLE_NEGATIVE "-"({DOUBLE}) DOUBLE_POSITIVE "+"({DOUBLE}) ECHAR "\\"[\"'\\bfnrt] STRING_LITERAL_LONG2 "\"\"\""((((("\"")|("\"\"")))?(([\x00-!#-\[\]-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))])|(({ECHAR})))))*"\"\"\"" STRING_LITERAL_LONG1 "'''"((((("'")|("''")))?(([\x00-&(-\[\]-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))])|(({ECHAR})))))*"'''" STRING_LITERAL2 "\""(((([\x00-\t\x0B-\x0C\x0E-!#-\[\]-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))]))|(({ECHAR}))))*"\"" STRING_LITERAL1 "'"(((([\x00-\t\x0B-\x0C\x0E-&(-\[\]-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))]))|(({ECHAR}))))*"'" WS (" ")|(("\t")|(("\r")|("\n"))) NIL "("(({WS}))*")" ANON "\["(({WS}))*"\]" PN_CHARS_BASE ([A-Z])|(([a-z])|(((\xC3[\x80-\x96]))|(((\xC3[\x98-\xB6]))|(((\xC3[\xB8-\xBF])|([\xC4-\xCB][\x80-\xBF]))|(((\xCD[\xB0-\xBD]))|(((\xCD\xBF)|([\xCE-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|(\xE1([\x80-\xBF][\x80-\xBF])))|(((\xE2(\x80[\x8C-\x8D])))|(((\xE2(\x81[\xB0-\xBF])|([\x82-\x85][\x80-\xBF])|(\x86[\x80-\x8F])))|(((\xE2([\xB0-\xBE][\x80-\xBF])|(\xBF[\x80-\xAF])))|(((\xE3(\x80[\x81-\xBF])|([\x81-\xBF][\x80-\xBF]))|([\xE4-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF])))|(((\xEF([\xA4-\xB6][\x80-\xBF])|(\xB7[\x80-\x8F])))|(((\xEF(\xB7[\xB0-\xBF])|([\xB8-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))|((\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])))))))))))))) PN_CHARS_U (({PN_CHARS_BASE}))|("_") VARNAME ((({PN_CHARS_U}))|([0-9]))(((({PN_CHARS_U}))|(([0-9])|((\xC2\xB7)|(((\xCD[\x80-\xAF]))|((\xE2(\x80\xBF)|(\x81\x80))))))))* VAR2 "$"({VARNAME}) VAR1 "?"({VARNAME}) PN_CHARS (({PN_CHARS_U}))|(("-")|(([0-9])|((\xC2\xB7)|(((\xCD[\x80-\xAF]))|((\xE2(\x80\xBF)|(\x81\x80))))))) PN_PREFIX ({PN_CHARS_BASE})(((((({PN_CHARS}))|(".")))*({PN_CHARS})))? PNAME_NS (({PN_PREFIX}))?":" PN_LOCAL ((({PN_CHARS_U}))|([0-9]))(((((({PN_CHARS}))|(".")))*({PN_CHARS})))? BLANK_NODE_LABEL "_:"({PN_LOCAL}) PNAME_LN ({PNAME_NS})({PN_LOCAL}) PASSED_TOKENS (([\t\n\r ])+)|("#"([\x00-\t\x0B-\x0C\x0E-\x7F]|([\xC2-\xDF][\x80-\xBF])|(\xE0([\xA0-\xBF][\x80-\xBF]))|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|([\xE1-\xEC][\x80-\xBF][\x80-\xBF])|(\xED([\x80-\x9F][\x80-\xBF]))|([\xEE-\xEF][\x80-\xBF][\x80-\xBF])|(\xF0([\x90-\xBF][\x80-\xBF][\x80-\xBF]))|([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF])|(\xF4([\x80-\x8E][\x80-\xBF][\x80-\xBF])|(\x8F([\x80-\xBE][\x80-\xBF])|(\xBF[\x80-\xBD])))])*) /* END patterns for SPARQL terminals */ /* START semantic actions for SPARQL terminals */ %% {PASSED_TOKENS} {IT_BASE} {yylval.semval = constructTerminal(e_IT_BASE, yytext); return IT_BASE;} {IT_PREFIX} {yylval.semval = constructTerminal(e_IT_PREFIX, yytext); return IT_PREFIX;} {IT_SELECT} {yylval.semval = constructTerminal(e_IT_SELECT, yytext); return IT_SELECT;} {IT_DISTINCT} {yylval.semval = constructTerminal(e_IT_DISTINCT, yytext); return IT_DISTINCT;} {IT_REDUCED} {yylval.semval = constructTerminal(e_IT_REDUCED, yytext); return IT_REDUCED;} {GT_TIMES} {yylval.semval = constructTerminal(e_GT_TIMES, yytext); return GT_TIMES;} {IT_CONSTRUCT} {yylval.semval = constructTerminal(e_IT_CONSTRUCT, yytext); return IT_CONSTRUCT;} {IT_DESCRIBE} {yylval.semval = constructTerminal(e_IT_DESCRIBE, yytext); return IT_DESCRIBE;} {IT_ASK} {yylval.semval = constructTerminal(e_IT_ASK, yytext); return IT_ASK;} {IT_FROM} {yylval.semval = constructTerminal(e_IT_FROM, yytext); return IT_FROM;} {IT_NAMED} {yylval.semval = constructTerminal(e_IT_NAMED, yytext); return IT_NAMED;} {IT_WHERE} {yylval.semval = constructTerminal(e_IT_WHERE, yytext); return IT_WHERE;} {IT_ORDER} {yylval.semval = constructTerminal(e_IT_ORDER, yytext); return IT_ORDER;} {IT_BY} {yylval.semval = constructTerminal(e_IT_BY, yytext); return IT_BY;} {IT_ASC} {yylval.semval = constructTerminal(e_IT_ASC, yytext); return IT_ASC;} {IT_DESC} {yylval.semval = constructTerminal(e_IT_DESC, yytext); return IT_DESC;} {IT_LIMIT} {yylval.semval = constructTerminal(e_IT_LIMIT, yytext); return IT_LIMIT;} {IT_OFFSET} {yylval.semval = constructTerminal(e_IT_OFFSET, yytext); return IT_OFFSET;} {GT_LCURLEY} {yylval.semval = constructTerminal(e_GT_LCURLEY, yytext); return GT_LCURLEY;} {GT_RCURLEY} {yylval.semval = constructTerminal(e_GT_RCURLEY, yytext); return GT_RCURLEY;} {GT_DOT} {yylval.semval = constructTerminal(e_GT_DOT, yytext); return GT_DOT;} {IT_OPTIONAL} {yylval.semval = constructTerminal(e_IT_OPTIONAL, yytext); return IT_OPTIONAL;} {IT_GRAPH} {yylval.semval = constructTerminal(e_IT_GRAPH, yytext); return IT_GRAPH;} {IT_UNION} {yylval.semval = constructTerminal(e_IT_UNION, yytext); return IT_UNION;} {IT_FILTER} {yylval.semval = constructTerminal(e_IT_FILTER, yytext); return IT_FILTER;} {GT_COMMA} {yylval.semval = constructTerminal(e_GT_COMMA, yytext); return GT_COMMA;} {GT_LPAREN} {yylval.semval = constructTerminal(e_GT_LPAREN, yytext); return GT_LPAREN;} {GT_RPAREN} {yylval.semval = constructTerminal(e_GT_RPAREN, yytext); return GT_RPAREN;} {GT_SEMI} {yylval.semval = constructTerminal(e_GT_SEMI, yytext); return GT_SEMI;} {IT_a} {yylval.semval = constructTerminal(e_IT_a, yytext); return IT_a;} {GT_LBRACKET} {yylval.semval = constructTerminal(e_GT_LBRACKET, yytext); return GT_LBRACKET;} {GT_RBRACKET} {yylval.semval = constructTerminal(e_GT_RBRACKET, yytext); return GT_RBRACKET;} {GT_OR} {yylval.semval = constructTerminal(e_GT_OR, yytext); return GT_OR;} {GT_AND} {yylval.semval = constructTerminal(e_GT_AND, yytext); return GT_AND;} {GT_EQUAL} {yylval.semval = constructTerminal(e_GT_EQUAL, yytext); return GT_EQUAL;} {GT_NEQUAL} {yylval.semval = constructTerminal(e_GT_NEQUAL, yytext); return GT_NEQUAL;} {GT_LT} {yylval.semval = constructTerminal(e_GT_LT, yytext); return GT_LT;} {GT_GT} {yylval.semval = constructTerminal(e_GT_GT, yytext); return GT_GT;} {GT_LE} {yylval.semval = constructTerminal(e_GT_LE, yytext); return GT_LE;} {GT_GE} {yylval.semval = constructTerminal(e_GT_GE, yytext); return GT_GE;} {GT_PLUS} {yylval.semval = constructTerminal(e_GT_PLUS, yytext); return GT_PLUS;} {GT_MINUS} {yylval.semval = constructTerminal(e_GT_MINUS, yytext); return GT_MINUS;} {GT_DIVIDE} {yylval.semval = constructTerminal(e_GT_DIVIDE, yytext); return GT_DIVIDE;} {GT_NOT} {yylval.semval = constructTerminal(e_GT_NOT, yytext); return GT_NOT;} {IT_STR} {yylval.semval = constructTerminal(e_IT_STR, yytext); return IT_STR;} {IT_LANG} {yylval.semval = constructTerminal(e_IT_LANG, yytext); return IT_LANG;} {IT_LANGMATCHES} {yylval.semval = constructTerminal(e_IT_LANGMATCHES, yytext); return IT_LANGMATCHES;} {IT_DATATYPE} {yylval.semval = constructTerminal(e_IT_DATATYPE, yytext); return IT_DATATYPE;} {IT_BOUND} {yylval.semval = constructTerminal(e_IT_BOUND, yytext); return IT_BOUND;} {IT_sameTerm} {yylval.semval = constructTerminal(e_IT_sameTerm, yytext); return IT_sameTerm;} {IT_isIRI} {yylval.semval = constructTerminal(e_IT_isIRI, yytext); return IT_isIRI;} {IT_isURI} {yylval.semval = constructTerminal(e_IT_isURI, yytext); return IT_isURI;} {IT_isBLANK} {yylval.semval = constructTerminal(e_IT_isBLANK, yytext); return IT_isBLANK;} {IT_isLITERAL} {yylval.semval = constructTerminal(e_IT_isLITERAL, yytext); return IT_isLITERAL;} {IT_REGEX} {yylval.semval = constructTerminal(e_IT_REGEX, yytext); return IT_REGEX;} {GT_DTYPE} {yylval.semval = constructTerminal(e_GT_DTYPE, yytext); return GT_DTYPE;} {IT_true} {yylval.semval = constructTerminal(e_IT_true, yytext); return IT_true;} {IT_false} {yylval.semval = constructTerminal(e_IT_false, yytext); return IT_false;} {IRI_REF} {yylval.semval = constructTerminal(e_IRI_REF, yytext); return IRI_REF;} {PNAME_NS} {yylval.semval = constructTerminal(e_PNAME_NS, yytext); return PNAME_NS;} {PNAME_LN} {yylval.semval = constructTerminal(e_PNAME_LN, yytext); return PNAME_LN;} {BLANK_NODE_LABEL} {yylval.semval = constructTerminal(e_BLANK_NODE_LABEL, yytext); return BLANK_NODE_LABEL;} {VAR1} {yylval.semval = constructTerminal(e_VAR1, yytext); return VAR1;} {VAR2} {yylval.semval = constructTerminal(e_VAR2, yytext); return VAR2;} {LANGTAG} {yylval.semval = constructTerminal(e_LANGTAG, yytext); return LANGTAG;} {INTEGER} {yylval.semval = constructTerminal(e_INTEGER, yytext); return INTEGER;} {DECIMAL} {yylval.semval = constructTerminal(e_DECIMAL, yytext); return DECIMAL;} {DOUBLE} {yylval.semval = constructTerminal(e_DOUBLE, yytext); return DOUBLE;} {INTEGER_POSITIVE} {yylval.semval = constructTerminal(e_INTEGER_POSITIVE, yytext); return INTEGER_POSITIVE;} {DECIMAL_POSITIVE} {yylval.semval = constructTerminal(e_DECIMAL_POSITIVE, yytext); return DECIMAL_POSITIVE;} {DOUBLE_POSITIVE} {yylval.semval = constructTerminal(e_DOUBLE_POSITIVE, yytext); return DOUBLE_POSITIVE;} {INTEGER_NEGATIVE} {yylval.semval = constructTerminal(e_INTEGER_NEGATIVE, yytext); return INTEGER_NEGATIVE;} {DECIMAL_NEGATIVE} {yylval.semval = constructTerminal(e_DECIMAL_NEGATIVE, yytext); return DECIMAL_NEGATIVE;} {DOUBLE_NEGATIVE} {yylval.semval = constructTerminal(e_DOUBLE_NEGATIVE, yytext); return DOUBLE_NEGATIVE;} {STRING_LITERAL1} {yylval.semval = constructTerminal(e_STRING_LITERAL1, yytext); return STRING_LITERAL1;} {STRING_LITERAL2} {yylval.semval = constructTerminal(e_STRING_LITERAL2, yytext); return STRING_LITERAL2;} {STRING_LITERAL_LONG1} {yylval.semval = constructTerminal(e_STRING_LITERAL_LONG1, yytext); return STRING_LITERAL_LONG1;} {STRING_LITERAL_LONG2} {yylval.semval = constructTerminal(e_STRING_LITERAL_LONG2, yytext); return STRING_LITERAL_LONG2;} {NIL} {yylval.semval = constructTerminal(e_NIL, yytext); return NIL;} {ANON} {yylval.semval = constructTerminal(e_ANON, yytext); return ANON;} <> { yyterminate();} %% char* getContextString(char* m) { #define _SIZE 80 char* ptr = (yy_buffer_stack)[(yy_buffer_stack_top)]->yy_ch_buf + strlen((yy_buffer_stack)[(yy_buffer_stack_top)]->yy_ch_buf); int len; for (len = 0; ptr > (yy_buffer_stack)[(yy_buffer_stack_top)]->yy_ch_buf && *(ptr-1) != '\n' && len < _SIZE-1; --ptr, ++len); return ptr; }