Main Page   Namespace List   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages   Examples  

NTriplesParser.cpp

00001 #define TRACE_NAME "NTriplesParser"
00002 #include "config.h"
00003 #include "NTriplesParser.h"
00004 #include "ntriples.h"
00005 #include "Triple.h"
00006 #include "TripleSink.h"
00007 #include "VariableScope.h"
00008 
00009 NTriplesParser* ntriples_parser = 0;
00010 
00012 //
00013 //  Standard Member Functions
00014 //
00016 
00017 NTriplesParser::NTriplesParser()
00018     : line(1)
00019 {
00020     // blank default    
00021 }
00022 
00023 #if 0  /* omit definitions until they are implemented */
00024 
00025 NTriplesParser::NTriplesParser(const NTriplesParser& other)
00026 {
00027     NOT_IMPLEMENTED
00028 }
00029 
00030 const NTriplesParser& NTriplesParser::operator=(const NTriplesParser& other)
00031 {
00032     NOT_IMPLEMENTED
00033 }
00034 
00035 bool NTriplesParser::operator==(const NTriplesParser& other) const
00036 {
00037     NOT_IMPLEMENTED
00038 }
00039 
00040 bool NTriplesParser::operator<(const NTriplesParser& other) const
00041 {
00042     NOT_IMPLEMENTED
00043 }
00044 
00045 size_t NTriplesParser::hash() const
00046 {
00047     NOT_IMPLEMENTED
00048 }
00049 
00050 std::ostream& NTriplesParser::print_to(std::ostream& stream) const
00051 {
00052     NOT_IMPLEMENTED
00053 }
00054 
00055 #endif /* omit definitions until they are implemented */
00056     
00057 NTriplesParser::~NTriplesParser()
00058 {
00059     // blank default    
00060 }
00061 
00063 //
00064 //  Additional Public Member Functions
00065 //
00067 
00068 #ifdef STREAMING
00069 
00070 // we want a way to tell the Symbol() it should own allocated char* ...
00071 
00072 void NTriplesParser::write(const void* byteBuffer, size_t numBytes)
00073 {
00074     const char* end = byteBuffer+numBytes;
00075     const char* text = 0;
00076     for (const char *p = byteBuffer; p<end; ++p) {
00077     switch (state) {
00078     case START:
00079         switch(*p) {
00080         case '\n':
00081         line++;
00082         case ' ':
00083         case '\r':
00084         case '\t':
00085         case '.':
00086         break;
00087         case '#': 
00088         state = IN_COMMENT;
00089         break;
00090         case '<':
00091         state = IN_URI;
00092         break;
00093         case '_':
00094         state = GOT_UNDERLINE;
00095         break;
00096         case '"':
00097         state = IN_STRING;
00098         default:
00099         ERROR("unexpected character outside of a term");
00100         }
00101         break;
00102     case IN_COMMENT:
00103         switch(*p) {
00104         case '\n':
00105         line++;
00106         state = START;
00107         break;
00108         default:
00109         // everything else okay in comments
00110         break;
00111         }
00112         break;
00113     case GOT_UNDERLINE:
00114         switch(*p) {
00115         case ':':
00116         state = IN_VAR;
00117         break;
00118         default:
00119         ERROR("expecting : after _");
00120         state = START;
00121         break;
00122         }
00123         break;
00124     case IN_URI:
00125         if (!text) text = p;
00126         switch(*p) {
00127         case '>':
00128         NEW_CONSTANT;     // combine fifo text and text-p, neither null-terminated
00129         state = START;
00130         break;
00131         case '(':  case ')':  
00132         case '[':  case ']':
00133         case '{':  case '}':
00134         case '<':  case ' ':
00135         case '\'':  case '"':
00136         ERROR("illegal char in URI refernce");
00137         break;
00138         default:
00139         // perhaps we should explicitely list URIC; also more syntax checking?
00140         break;
00141         }
00142         break;
00143     case IN_VAR:
00144         if (!text) text = p;
00145         switch(*p) {
00146         case ' ':
00147         NEW_VARIABLE;
00148         state = START;
00149         break;
00150         case '(':  case ')':  
00151         case '[':  case ']':
00152         case '{':  case '}':
00153         case '<':  case '>':
00154         case '\'':  case '"':
00155         ERROR("illegal char in QName");
00156         break;
00157         default:
00158         // perhaps we should explicitely list them?
00159         break;
00160         }
00161         break;
00162     case IN_STRING:
00163 
00164         break;
00165     }
00166     }
00167     if (text) {
00168     fifo.write(text, (end-text));
00169     }
00170 }
00171 
00172 #else
00173 
00174 void NTriplesParser::write(const void* byteBuffer, size_t numBytes)
00175 {
00176     fifo.write(byteBuffer, numBytes);
00177 }
00178 #if EXTERNAL
00179 
00180 void NTriplesParser::close()
00181 {
00182     if (ntriples_parser) die("re-entrant use of NTriplesParser");
00183     ntriples_parser = this;
00184     if (!ntriples_parse()) {
00185     // should throw exception
00186     std::cout << "parse failed?";
00187     }
00188     ntriples_parser = 0;
00189 }
00190 #else
00191 
00192 Symbol NTriplesParser::getSym(char*& p, VariableScope* scope) 
00193 {
00194     Symbol::Type type;
00195     char* start;
00196     for ( ; *p; ++p) {
00197     switch (*p) {
00198     case '\n':
00199         line++;
00200     case ' ':
00201     case '\r':
00202     case '\t':
00203     case '.':
00204         break;
00205     case '#': {
00206         for ( ; *p; ++p) {
00207         if (*p == '\n') break;
00208         }
00209         break;
00210         line++;
00211     }
00212     case '_': {
00213         start = ++p;
00214         if (*start++ != ':') {
00215         std::cerr << "_ not followed by :" << " on line " << line << std::endl;
00216         exit(2);
00217         }
00218         type = Symbol::VARIABLE;
00219         for ( ; *p; ++p) {
00220         switch (*p) {
00221         case ' ':
00222         case '.':
00223             *p++ = '\0';
00224             return scope->getVar(start);
00225         case '\n':
00226             line++;
00227         case '\r':
00228         case '"':
00229         case '\'':
00230         case '{':
00231         case '}':
00232             std::cerr << "Invalid character in QName on line " << line << std::endl;
00233             exit(2);
00234         default:
00235             break;
00236         }
00237         }
00238     }
00239 
00240     case '<': {
00241         start = ++p;
00242         type = Symbol::CONSTANT;
00243         if (start[0] == '_' && start[1] == ':') {
00244         start += 2;
00245         type = Symbol::VARIABLE;
00246         exit(2);
00247         }
00248         for ( ; *p; ++p) {
00249         switch (*p) {
00250         case '>':
00251             *p++ = '\0';
00252             return Symbol(type, start);
00253         case '\n':
00254             line++;
00255         case ' ':
00256         case '\r':
00257         case '"':
00258         case '\'':
00259         case '{':
00260         case '}':
00261             std::cerr << "Invalid character in URI-Reference: '" <<
00262             *p << "' (ascii " << static_cast<int>(*p) << ")" << " on line " << line << std::endl;
00263 
00264             exit(2);
00265         default:
00266             break;
00267         }
00268         }
00269     }
00270     case '"': {
00271         char* start = ++p;
00272         for ( ; *p; ++p) {
00273         switch (*p) {
00274         case '"':
00275             *p++ = '\0';
00276             return Symbol(Symbol::LITERAL, start);
00277         case '\\':
00278             // needs a copy-to buffer thing
00279             std::cerr << "back-slash Escapes not yet implemented.   Used on line" << line << std::endl;
00280             exit(2);
00281         }
00282         }
00283     }
00284 
00285     default:
00286         std::cerr << "Invalid character: '" << *p << "', " << (int) *p << " on line " << line << std::endl;
00287         exit(2);
00288     }
00289     }
00290     return Symbol::null;
00291     }
00292 
00293 void NTriplesParser::close()
00294 {
00295     fifo.write("", 1);
00296     TRACE "** Parsing to scope ";
00297     if (TRACING) scopeForSink->print_to(TRACEOUT);
00298     TRACE std::endl;
00299     // we need it for adding, but we don't WANT to merge here, ... or do we...?
00300     for (char *p = (char*) fifo.peek(); *p; p++) {
00301     Triple t;
00302     for (int i=0; i<3; i++) {
00303         t.set(i, getSym(p, scopeForSink));
00304     }
00305     if (t) getSink()->add(t);
00306     //std::cout << "Triple " << t << " is " << (t?"true":"false") << std::endl;
00307     } 
00308 }
00309 
00310 #endif
00311 #endif
00312 
00314 //
00315 //  Additional Private Member Functions
00316 //
00318 
00319 #undef TRACE_NAME

Home to blindfold. This page generated via doxygen 1.2.11.1 Wed Oct 10 16:40:33 2001.