00001 #define TRACE_NAME "NTriplesParser"
00002 #include "config.h"
00003 #include "NTriplesParser.h"
00004 #include "ntriples.h"
00005 #include "Triple.h"
00006 #include "TripleSink.h"
00007 #include "VariableScope.h"
00008
00009 NTriplesParser* ntriples_parser = 0;
00010
00012
00013
00014
00016
00017 NTriplesParser::NTriplesParser()
00018 : line(1)
00019 {
00020
00021 }
00022
00023 #if 0
00024
00025 NTriplesParser::NTriplesParser(const NTriplesParser& other)
00026 {
00027 NOT_IMPLEMENTED
00028 }
00029
00030 const NTriplesParser& NTriplesParser::operator=(const NTriplesParser& other)
00031 {
00032 NOT_IMPLEMENTED
00033 }
00034
00035 bool NTriplesParser::operator==(const NTriplesParser& other) const
00036 {
00037 NOT_IMPLEMENTED
00038 }
00039
00040 bool NTriplesParser::operator<(const NTriplesParser& other) const
00041 {
00042 NOT_IMPLEMENTED
00043 }
00044
00045 size_t NTriplesParser::hash() const
00046 {
00047 NOT_IMPLEMENTED
00048 }
00049
00050 std::ostream& NTriplesParser::print_to(std::ostream& stream) const
00051 {
00052 NOT_IMPLEMENTED
00053 }
00054
00055 #endif
00056
00057 NTriplesParser::~NTriplesParser()
00058 {
00059
00060 }
00061
00063
00064
00065
00067
00068 #ifdef STREAMING
00069
00070
00071
00072 void NTriplesParser::write(const void* byteBuffer, size_t numBytes)
00073 {
00074 const char* end = byteBuffer+numBytes;
00075 const char* text = 0;
00076 for (const char *p = byteBuffer; p<end; ++p) {
00077 switch (state) {
00078 case START:
00079 switch(*p) {
00080 case '\n':
00081 line++;
00082 case ' ':
00083 case '\r':
00084 case '\t':
00085 case '.':
00086 break;
00087 case '#':
00088 state = IN_COMMENT;
00089 break;
00090 case '<':
00091 state = IN_URI;
00092 break;
00093 case '_':
00094 state = GOT_UNDERLINE;
00095 break;
00096 case '"':
00097 state = IN_STRING;
00098 default:
00099 ERROR("unexpected character outside of a term");
00100 }
00101 break;
00102 case IN_COMMENT:
00103 switch(*p) {
00104 case '\n':
00105 line++;
00106 state = START;
00107 break;
00108 default:
00109
00110 break;
00111 }
00112 break;
00113 case GOT_UNDERLINE:
00114 switch(*p) {
00115 case ':':
00116 state = IN_VAR;
00117 break;
00118 default:
00119 ERROR("expecting : after _");
00120 state = START;
00121 break;
00122 }
00123 break;
00124 case IN_URI:
00125 if (!text) text = p;
00126 switch(*p) {
00127 case '>':
00128 NEW_CONSTANT;
00129 state = START;
00130 break;
00131 case '(': case ')':
00132 case '[': case ']':
00133 case '{': case '}':
00134 case '<': case ' ':
00135 case '\'': case '"':
00136 ERROR("illegal char in URI refernce");
00137 break;
00138 default:
00139
00140 break;
00141 }
00142 break;
00143 case IN_VAR:
00144 if (!text) text = p;
00145 switch(*p) {
00146 case ' ':
00147 NEW_VARIABLE;
00148 state = START;
00149 break;
00150 case '(': case ')':
00151 case '[': case ']':
00152 case '{': case '}':
00153 case '<': case '>':
00154 case '\'': case '"':
00155 ERROR("illegal char in QName");
00156 break;
00157 default:
00158
00159 break;
00160 }
00161 break;
00162 case IN_STRING:
00163
00164 break;
00165 }
00166 }
00167 if (text) {
00168 fifo.write(text, (end-text));
00169 }
00170 }
00171
00172 #else
00173
00174 void NTriplesParser::write(const void* byteBuffer, size_t numBytes)
00175 {
00176 fifo.write(byteBuffer, numBytes);
00177 }
00178 #if EXTERNAL
00179
00180 void NTriplesParser::close()
00181 {
00182 if (ntriples_parser) die("re-entrant use of NTriplesParser");
00183 ntriples_parser = this;
00184 if (!ntriples_parse()) {
00185
00186 std::cout << "parse failed?";
00187 }
00188 ntriples_parser = 0;
00189 }
00190 #else
00191
00192 Symbol NTriplesParser::getSym(char*& p, VariableScope* scope)
00193 {
00194 Symbol::Type type;
00195 char* start;
00196 for ( ; *p; ++p) {
00197 switch (*p) {
00198 case '\n':
00199 line++;
00200 case ' ':
00201 case '\r':
00202 case '\t':
00203 case '.':
00204 break;
00205 case '#': {
00206 for ( ; *p; ++p) {
00207 if (*p == '\n') break;
00208 }
00209 break;
00210 line++;
00211 }
00212 case '_': {
00213 start = ++p;
00214 if (*start++ != ':') {
00215 std::cerr << "_ not followed by :" << " on line " << line << std::endl;
00216 exit(2);
00217 }
00218 type = Symbol::VARIABLE;
00219 for ( ; *p; ++p) {
00220 switch (*p) {
00221 case ' ':
00222 case '.':
00223 *p++ = '\0';
00224 return scope->getVar(start);
00225 case '\n':
00226 line++;
00227 case '\r':
00228 case '"':
00229 case '\'':
00230 case '{':
00231 case '}':
00232 std::cerr << "Invalid character in QName on line " << line << std::endl;
00233 exit(2);
00234 default:
00235 break;
00236 }
00237 }
00238 }
00239
00240 case '<': {
00241 start = ++p;
00242 type = Symbol::CONSTANT;
00243 if (start[0] == '_' && start[1] == ':') {
00244 start += 2;
00245 type = Symbol::VARIABLE;
00246 exit(2);
00247 }
00248 for ( ; *p; ++p) {
00249 switch (*p) {
00250 case '>':
00251 *p++ = '\0';
00252 return Symbol(type, start);
00253 case '\n':
00254 line++;
00255 case ' ':
00256 case '\r':
00257 case '"':
00258 case '\'':
00259 case '{':
00260 case '}':
00261 std::cerr << "Invalid character in URI-Reference: '" <<
00262 *p << "' (ascii " << static_cast<int>(*p) << ")" << " on line " << line << std::endl;
00263
00264 exit(2);
00265 default:
00266 break;
00267 }
00268 }
00269 }
00270 case '"': {
00271 char* start = ++p;
00272 for ( ; *p; ++p) {
00273 switch (*p) {
00274 case '"':
00275 *p++ = '\0';
00276 return Symbol(Symbol::LITERAL, start);
00277 case '\\':
00278
00279 std::cerr << "back-slash Escapes not yet implemented. Used on line" << line << std::endl;
00280 exit(2);
00281 }
00282 }
00283 }
00284
00285 default:
00286 std::cerr << "Invalid character: '" << *p << "', " << (int) *p << " on line " << line << std::endl;
00287 exit(2);
00288 }
00289 }
00290 return Symbol::null;
00291 }
00292
00293 void NTriplesParser::close()
00294 {
00295 fifo.write("", 1);
00296 TRACE "** Parsing to scope ";
00297 if (TRACING) scopeForSink->print_to(TRACEOUT);
00298 TRACE std::endl;
00299
00300 for (char *p = (char*) fifo.peek(); *p; p++) {
00301 Triple t;
00302 for (int i=0; i<3; i++) {
00303 t.set(i, getSym(p, scopeForSink));
00304 }
00305 if (t) getSink()->add(t);
00306
00307 }
00308 }
00309
00310 #endif
00311 #endif
00312
00314
00315
00316
00318
00319 #undef TRACE_NAME