//model.cc

#include "model.h"

#include <iostream>
#include <cctype>

using namespace std;

void model::read(istream& in) {

	int s, p, o;

	while (!in.eof()) {

		char c;

	skip_comments:
		in >> c;
		if (c == '#') {
			while (in.get(c), !in.eof() && c != '\n' && c != '\r');
			goto skip_comments;
		} else {
			in.putback(c);
		}

		if ((s = readtoken(in)) == 0) return;
		if ((p = readtoken(in)) == 0) {
			cerr << "Warning: bad p" << endl;
			return;
		}
		if ((o = readtoken(in)) == 0) {
			cerr << "Warning: bad o" << endl;
			return;
		}
		if (s < 0 || p < 0 || o < 0) {
			atriples.insert(triple(s, p, o));
		} else {
			striples.insert(triple(s, p, o));
		}

		// Check for the "."
		in >> c;
		if (c != '.') {
			cerr << "Warning: missing '.'" << endl;
			return;
		}
	}
}

/*

literal		::= langstring | xmlString

langString	::= '"' string '"' ( '-' language )?

xmlString	::= 'xml' langString

language	::= ( character - ( '.' | ws ) )+
				and containing any allowed xml:lang content as defined in
				http://www.w3.org/TR/REC-xml#sec-lang-tag

*/

int model::readtoken(istream& in) {

	char c;

	do {
		if (in.eof()) return 0;
		in >> c;
	} while (isspace(c));	// Is this _ever_ likely to loop? I don't think so.

	if (in.eof()) return 0;

	string s;
	int mid;

	switch (c) {
	case '<':
		in.putback(c);
		while (in.get(c), !in.eof() && c != '>') {
			if (isspace(c)) return 0;
			s += c;
		}
		if (c != '>') return 0;
		s += c;
		mid = nextpos;
		break;
	case 'x':
		s = c;
		if (in.get(c), c != 'm') goto nope;
		s += c;
		if (in.get(c), c != 'l') goto nope;
		s += c;
		if (in.get(c), c != '\"') goto nope;
		/*FALLTHROUGH*/
	case '\"':
		s += c;
		while (in.get(c), !in.eof() && c != '\"') {
			s += c;
			if (c == '\\') {
				in.get(c);
				s += c;
			}
		}
		if (c != '\"') return 0;
		s += c;
		// (-[0x21-0x7f - .] )?
		in.get(c);
		if (c == '-') {
			s += c;
			while (in.get(c), !in.eof() && c != '.' && !isspace(c)) {
				s += c;
			}
			if (c == '.') in.putback(c);
		} else {
			in.putback(c);
		}
		mid = nextpos;
		break;
	case '_':
		in.putback(c);
		while (in.get(c), (!in.eof() && c != '.' && !isspace(c))) {
			s += c;
		}
		if (in.eof()) return 0;
		in.putback(c);
		mid = nextneg;
		break;
	default:
	nope:
		cerr << "Warning: got spurious character." << endl;
		return 0;
	}

	map<string,int>::const_iterator i = id.find(s);
	if (i != id.end()) return i->second;

	id[s] = mid;
	name[mid] = s;
	if (mid < 0) nextneg--; else nextpos++;
	return mid;

}

void model::write(ostream& out) {

	out << "Dump of model." << endl;

	out << "Static triples:" << endl;
	set<triple>::const_iterator i = striples.begin();
	while (i != striples.end()) {
		out << name[(*i).s] << " " <<
			name[(*i).p] << " " <<
			name[(*i).o] << " ." << endl;
		++ i;
	}

	out << "Anonymous triples:" << endl;
	i = atriples.begin();
	while (i != atriples.end()) {
		out << name[(*i).s] << " " <<
			name[(*i).p] << " " <<
			name[(*i).o] << " ." << endl;
		++ i;
	}

	out << "Done." << endl;

}

