// $Id: Parser.java,v 1.27 2003/09/29 23:24:22 jderoo Exp $
// PxButton | build | javac -O *.java |

import java.net.URL;
import java.util.Vector;

/**
N3 parser.
  @author Jos De Roo
                                                                                                    */
public class Parser {
  String str;                               // N3 string
  int pos = 0;                              // tokenizer position
  Vector vt = null;                         // quantified variables table
  Euler r = null;                           // root Euler object
  String u = null;                          // base URI of the RDF resource

  static final String ST = "{}[]()<>\"';,.^! \t\r\n\\";
  static final String LOGa = "a";
  static final String LOGe = "=";
  static final String LOGi = "=>";
  static final String LOGimplies = "<http://www.w3.org/2000/10/swap/log#implies>";
  static final String DPO = "<http://www.daml.org/2001/03/daml+oil";
  static final String ONT = "<http://www.w3.org/2001/10/daml+oil";
  static final String OWL = "<http://www.w3.org/2002/07/owl";
  static final String OWLequivalentTo = "<http://www.w3.org/2002/07/owl#equivalentTo>";
  static final String OWLsameIndividualAs = "<http://www.w3.org/2002/07/owl#sameIndividualAs>";
  static final String OWLsameAs = "<http://www.w3.org/2002/07/owl#sameAs>";
  static final String OWLsameClassAs = "<http://www.w3.org/2002/07/owl#sameClassAs>";
  static final String OWLequivalentClass = "<http://www.w3.org/2002/07/owl#equivalentClass>";
  static final String OWLsamePropertyAs = "<http://www.w3.org/2002/07/owl#samePropertyAs>";
  static final String OWLequivalentProperty = "<http://www.w3.org/2002/07/owl#equivalentProperty>";
  static final String OWLdifferentIndividualFrom = "<http://www.w3.org/2002/07/owl#differentIndividualFrom>";
  static final String OWLdifferentFrom = "<http://www.w3.org/2002/07/owl#differentFrom>";
  static final String OWLFunctionalProperty = "<http://www.w3.org/2002/07/owl#FunctionalProperty>";
  static final String OWLInverseFunctionalProperty = "<http://www.w3.org/2002/07/owl#InverseFunctionalProperty>";
  static final String OWLUnambiguousProperty = "<http://www.w3.org/2002/07/owl#UnambiguousProperty>";
  static final String OWLUniqueProperty = "<http://www.w3.org/2002/07/owl#UniqueProperty>";
  static final String XSDstring = "<http://www.w3.org/2001/XMLSchema#string>";
  static final String RDFtype = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>";
  static final String RDFfirst = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#first>";
  static final String RDFrest = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#rest>";
  static final String RDFnil = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#nil>";

/**
  constructs an N3 parser
                                                                                                    */
  public Parser() {
  }

/**
  constructs an N3 parser
  @param s the N3 string
  @param vartab quantified variables table
  @param root Euler object
  @param base URI of the RDF resource
                                                                                                    */
  public Parser(String s, Vector vartab, Euler root, String uri) {
    str = s;
    vt = vartab;
    r = root;
    u = uri;
  }

/**
  N3 triple parse method
  @return Euler obj
                                                                                                    */
  public Euler parse() {
    String nt = tokenize();
    if (nt == null) return null;
    return parse(true, nt);
  }

/**
  N3 node parse method
  @param nt next token
  @return Euler obj
                                                                                                    */
  public Euler parse(String nt) {
    return parse(false, nt);
  }

  Euler parse(boolean b, String nt) {
    Euler e = new Euler();
    try {
      if (b) {
        if (nt == null) e.subj = null;
        else if (nt.equals("{")) {
          nt = tokenize();
          if (nt.equals(".")) e.subj = parse(false, "{}");
          else e.subj = parse(true, nt);
          Euler el = e.subj;
          nt = tokenize();
          while (el.near != null) el = el.near;
          while (!nt.equals("}")) {
            el.near = parse(true, nt);
            while (el.near != null) el = el.near;
            nt = tokenize();
          }
        }
        else if (nt.equals("[")) {
          e.subj = parse(true, null);
          if (e.subj.verb == null) r.verb = ";]";
          nt = tokenize();
          if (!nt.equals("]")) System.err.println("** (p1) expecting ] at " + e + " but got " + nt);
        }
        else if (nt.equals("(")) {
          list(e);
          e.subj = e.obj;
          e.obj = null;
        }
        else {
          e.subj = parse(false, nt);
          if (nt.equals("this")) e.subj.verb = '<' + u + "#frag" + e.hashCode() + '>';
        }
        path(e.subj);
        nt = tokenize();
      }
      if (nt == null || nt.equals(";")) {
        e.bound = true;
        return e;
      }
      else if (e.subj != null && nt.startsWith("_:")) {
        e.cverb = nt;
        if (!vt.contains(nt)) vt.addElement(nt);
      }
      else if (nt.equals("[")) {
        String a = "_:" + e.hashCode();
        Euler ap = parse(true, a);
        e.cverb = a;
        if (ap.verb != null) r.verb = ";]";
        nt = tokenize();
        if (!nt.equals("]")) System.err.println("** (p2) expecting ] at " + e + " but got " + nt);
        e.near = ap;
      }
      else if (nt.equals("(")) {
        list(e);
        return e.obj;
      }
      else e.cverb = nt;
      if (e.cverb.endsWith("_@")) {
        e.vv = true;
        e.cverb = e.cverb.substring(0, e.cverb.length() - 2);
      }
      e.verb = e.cverb;
      if (e.verb.endsWith("@@")) e.verb = e.verb.substring(0, e.verb.length() - 2);
      String s1 = e.verb;
      String s2 = "";
      if (e.verb.indexOf('"') != -1) {
        s1 = e.verb.substring(0, e.verb.indexOf('"'));
        s2 = e.verb.substring(e.verb.indexOf('"'));
      }
      if (r != null && s1.length() > 0 && !s1.equals(LOGa) && !s1.equals(LOGe) &&
          s1.charAt(0) != '(' && s1.charAt(0) != '<' && s1.charAt(0) != '[' &&
          s1.charAt(0) != '_' && s1.charAt(0) != '"' && s1.indexOf(':') != -1) {
        String pf = s1.substring(0, s1.indexOf(':') + 1);
        String pg = (String)r.nsp.get(pf);
        if (pg != null) pf = pg;
        String qf = (String)r.ns.get(pf);
        if (qf == null) {
          System.err.println("** no @prefix " + pf + " found, taking <" + u + "#>");
          qf = "<" + u + "#>";
          r.ns.put(pf, qf);
        }
        StringBuffer sb = new StringBuffer(pf);
        sb.append(e.cverb.substring(e.cverb.indexOf(':') + 1));
        e.cverb = sb.toString();
        sb = new StringBuffer(qf);
        sb.insert(sb.length() - 1, s1.substring(s1.indexOf(':') + 1));
        e.verb = sb.toString() + s2;
      }
      else if (u != null && s1.length() > 0 && s1.charAt(0) == '<' && s1.indexOf(':') == -1)
        e.verb = e.cverb = '<' + toURI(s1.substring(1, s1.length() - 1)) + '>' + s2;
      if (e.verb.startsWith("'")) e.verb = "'" + new Double(e.verb.substring(1, e.verb.length() - 1)) + "'";
      if (e.verb.equals(LOGa)) e.verb = RDFtype;
      if (e.verb.equals(LOGe)) e.verb = OWLsameAs;
      if (e.verb.equals(LOGi)) e.verb = LOGimplies;
      if (e.verb.startsWith(DPO)) e.verb = OWL + e.verb.substring(DPO.length());
      if (e.verb.startsWith(ONT)) e.verb = OWL + e.verb.substring(ONT.length());
      if (e.verb.equals(OWLequivalentTo)) e.verb = OWLsameAs;
      if (e.verb.equals(OWLsameIndividualAs)) e.verb = OWLsameAs;
      if (e.verb.equals(OWLsameClassAs)) e.verb = OWLequivalentClass;
      if (e.verb.equals(OWLsamePropertyAs)) e.verb = OWLequivalentProperty;
      if (e.verb.equals(OWLdifferentIndividualFrom)) e.verb = OWLdifferentFrom;
      if (e.verb.equals(OWLUnambiguousProperty)) e.verb = OWLInverseFunctionalProperty;
      if (e.verb.equals(OWLUniqueProperty)) e.verb = OWLFunctionalProperty;
      if (e.verb.startsWith("?") && vt != null && !vt.contains(e.verb)) vt.addElement(e.verb);
      if (vt != null) e.varid = vt.indexOf(e.verb);
      if (e.varid == -1) e.bound = true;
      if (nt.equals(".") && (str.charAt(pos) == ' ' || str.charAt(pos) == '\n')) {
        e.obj = parse(false, "");
        e.verb = "";
        e.cverb = "";
        e.bound = true;
        return e;
      }
      if (!nt.equals(RDFfirst) && str != null && pos < str.length() && str.charAt(pos) == '^' && str.charAt(pos + 1) == '^') {
        pos = pos + 2;
        Euler et = parse(false, tokenize());
        if (!et.verb.equals(XSDstring)) {
          e.subj = e.copy();
          e.verb = e.cverb = "^^";
          e.bound = true;
          e.obj = et;
          e.obj.verb.intern();
        }
      }
      if (b) {
        nt = tokenize();
        if (nt.equals("{")) {
          nt = tokenize();
          if (nt.equals(".")) e.obj = parse(false, "{}");
          else e.obj = parse(true, nt);
          Euler el = e.obj;
          nt = tokenize();
          while (nt != null && !nt.equals("}")) {
            el.near = parse(true, nt);
            el = el.near;
            nt = tokenize();
          }
        }
        else if (nt.equals("[")) {
          e.obj = parse(true, null);
          if (e.obj.verb != null) r.verb = ";]";
          nt = tokenize();
          if (!nt.equals("]")) System.err.println("** (p3) expecting ] at " + e + " but got " + nt);
        }
        else if (nt.equals("(")) list(e);
        else e.obj = parse(false, nt);
        path(e.obj);
        nt = tokenize();
        if (nt.endsWith("\"\"\"")) {
          e.obj.cverb = e.obj.cverb + nt;
          e.obj.verb = e.obj.cverb;
          nt = tokenize();
        }
        Euler el = e;
        while (nt.equals(";") || nt.equals(",")) {
          while (el.near != null) el = el.near;
          if (nt.equals(";")) {
            nt = tokenize();
            if (nt.equals("]")) {
              r.verb = ";]";
              nt = ";";
              break;
            }
            else if (nt.equals(".")) break;
          }
          else nt = el.cverb;
          el.near = parse(false, nt);
          el.near.subj = e.subj;
          nt = tokenize();
          if (nt.equals("{")) {
            nt = tokenize();
            if (nt.equals(".")) el.near.obj = parse(false, "{}");
            else el.near.obj = parse(true, nt);
            Euler ef = el.near.obj;
            nt = tokenize();
            while (nt != null && !nt.equals("}")) {
              ef.near = parse(true, nt);
              ef = ef.near;
              nt = tokenize();
            }
          }
          else if (nt.equals("[")) {
            el.near.obj = parse(true, null);
            if (el.near.obj.verb != null) r.verb = ";]";
            nt = tokenize();
            if (!nt.equals("]")) System.err.println("** (p4) expecting ] at " + e + " but got " + nt);
          }
          else if (nt.equals("(")) list(el.near);
          else el.near.obj = parse(false, nt);
          nt = tokenize();
          if (nt.endsWith("\"\"\"")) {
            el.near.obj.cverb = el.near.obj.cverb + nt;
            el.near.obj.verb = el.near.obj.cverb;
            nt = tokenize();
          }
          swap(el.near);
        }
        if (!nt.equals(".") && !nt.equals(";"))
          System.err.println("** (p5) expecting . or ; at " + e + " but got " + nt);
      }
      swap(e);
      e.far = r;
      return e;
    }
    catch (NullPointerException exc) {
      exc.printStackTrace();
      return e;
    }
  }

  String tokenize() {
    String nt = token();
    while (pos < str.length() && nt == null) nt = token();
    return nt;
  }

  String next() {
    int start = pos;
    while (pos < str.length() && ST.indexOf(str.charAt(pos)) < 0) pos++;
    if (start == pos && ST.indexOf(str.charAt(pos)) >= 0) pos++;
    return str.substring(start, pos);
  }

  String token() {
    if (r.verb != null && r.verb.equals(".}")) return r.verb = "}";
    if (r.verb != null && r.verb.equals(";]")) return r.verb = "]";
    if (pos >= str.length()) return null;
    int start = pos;
    String t = null;
    String nt = next();
    if (nt.indexOf('#') != -1) {
      t = nt.substring(0, nt.indexOf('#'));
      String v = r.verb;
      while (nt != null && !nt.equals("\n") && pos < str.length()) nt = next();
      r.verb = v;
      if (t.equals("")) return null;
    }
    else if (nt.equals("@prefix") || nt.equals("bind")) {
      String nsc = tokenize();
      if (nsc.equals("default")) nsc = ":";
      String nsd = nsc;
      String nsu = tokenize();
      if (u != null && nsu.length() > 0 && nsu.charAt(0) == '<' && nsu.indexOf(':') == -1)
        nsu = '<' + toURI(nsu.substring(1, nsu.length() - 1)) + '>';
      String nsv = (String)r.ns.get(nsd);
      while (nsv != null && !nsu.equals(nsv)) {
        nsd = "ns" + nsd;
        nsv = (String)r.ns.get(nsd);
      }
      r.nsp.put(nsd, nsd);
      r.nsp.put(nsc, nsd);
      r.ns.put(nsd, nsu);
      tokenize();
      return null;
    }
    else if (nt.equals("\"")) {
      StringBuffer sb = new StringBuffer("\"");
      if (str.charAt(pos) == '"' && str.charAt(pos + 1) == '"') {
        sb.append(next());
        sb.append(next());
        while (true) {
          sb.append(next());
          if (sb.toString().endsWith("\"\"\"") && str.charAt(pos) != '"' && str.charAt(pos + 1) != '"') break;
        }
      }
      else {
        nt = next();
        while (!nt.equals("\"")) {
          if (nt.equals("\\")) {
            sb.append(nt);
            nt = next();
          }
          sb.append(nt);
          if (pos >= str.length()) {
            System.err.println("** (t1) expecting \" at " + sb);
            break;
          }
          nt = next();
        }
        sb.append("\"");
      }
      if (str.charAt(pos) == '@') {
        String lang = token();
        int i = lang.indexOf('-');
        if (i != -1) sb.append(lang.substring(0, i) + lang.substring(i).toUpperCase());
        else sb.append(lang);
      }
      t = sb.toString();
    }
    else if (nt.equals("'")) {
      StringBuffer sb = new StringBuffer("'");
      nt = next();
      while (!nt.equals("'")) {
        sb.append(nt);
        if (pos >= str.length()) {
          System.err.println("** (t2) expecting ' at " + sb);
          break;
        }
        nt = next();
      }
      t = sb.append("'").toString();
    }
    else if (nt.equals("<")) {
      StringBuffer sb = new StringBuffer("<");
      nt = next();
      while (!nt.equals(">")) {
        sb.append(nt);
        if (pos >= str.length()) {
          System.err.println("** (t2) expecting > at " + sb);
          break;
        }
        nt = next();
      }
      t = sb.append(">").toString();
    }
    else if (nt.equals("=")) {
      if (str.charAt(pos) == '>') t = nt + next();
      else t = nt;
    }
    else if (nt.equals("is")) {
      t = tokenize();
      nt = tokenize();
      if (!nt.equals("of")) System.err.println("** (t4) expecting \"of\" but got " + nt);
      t = t + "@@";
    }
    else if (nt.equals("has")) {
      t = tokenize() + "_@";
      int cpos = pos;
      nt = tokenize();
      if (!nt.equals("of")) pos = cpos;
    }
    else if (nt.equals(" ") || nt.equals("\t") || nt.equals("\r") || nt.equals("\n") ||
             nt.equals("-") || nt.equals(">")) return null;
    else if (nt.startsWith("_:") && !nt.endsWith("_" + r.doc)) t = nt + "_" + r.doc;
    else t = nt;
    if (pos < str.length() && ST.indexOf(t) == -1 && t.charAt(0) != '"' && str.charAt(pos) == '"') t = t + token();
    if (pos < str.length() && str.charAt(pos) == '.' &&
        str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\n' &&
        str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') {
      int opos = pos;
      try {
        t = new Double(t + next() + next()).toString();
      }
      catch (Exception e) {
        pos = opos;
      }
    }
    if (r.verb != null && t != null && !r.verb.equals(".") && t.equals("}")) {
      r.verb = ".}";
      return ".";
    }
    if (r.verb != null && t != null && !r.verb.equals(";") && t.equals("]")) {
      r.verb = ";]";
      return ";";
    }
    return r.verb = t;
  }

  void swap(Euler e) {
    if (e != null && e.subj == null && e.cverb.endsWith("@@")) {
      e.subj = e.obj;
      e.obj = parse(false, e.cverb.substring(0, e.cverb.length() - 2));
      e.verb = e.cverb = ".";
    }
    else if (e.subj != null && e.obj != null && e.cverb.endsWith("@@")) {
      Euler el = e.subj;
      e.subj = e.obj;
      e.obj = el;
      e.cverb = e.cverb.substring(0, e.cverb.length() - 2);
    }
  }

  void list(Euler e) {
    Euler el = e;
    while (true) {
      String nt = tokenize();
      if (nt.startsWith("@")) {
        el.obj = parse(false, '?' + nt.substring(1));
        nt = tokenize();
        if (!nt.equals(")")) System.err.println("** (p6) expecting ) at " + el + " but got " + nt);
        break;
      }
      else if (nt.equals(")")) {
        el.obj = parse(false, RDFnil);
        break;
      }
      else el.obj = parse(false, RDFfirst);
      if (nt.equals("[")) {
        el.obj.obj = parse(true, null);
        tokenize();
      }
      else el.obj.obj = parse(false, nt);
      path(el.obj.obj);
      el.obj.near = parse(false, RDFrest);
      el = el.obj.near;
    }
    path(e.obj);
  }

  void path(Euler e) {
    if (str != null && pos < str.length() - 1 && str.charAt(pos) == '.' &&
        str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\n' &&
        str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') {
      pos = pos + 1;
      e.subj = e.copy();
      e.verb = e.cverb = ".";
      e.varid = -1;
      e.bound = true;
      e.obj = parse(false, tokenize());
      e.obj.verb.intern();
      path(e);
    }
    if (str != null && pos < str.length() - 1 && str.charAt(pos) == '^' &&
        str.charAt(pos + 1) != '^' && str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\n' &&
        str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') {
      pos = pos + 1;
      e.subj = e.copy();
      e.verb = e.cverb = "^";
      e.varid = -1;
      e.bound = true;
      e.obj = parse(false, tokenize());
      e.obj.verb.intern();
      path(e);
    }
  }

  final String toURI(String s) {
    try {
      s = new URL(new URL(u), s).toString();
    }
    catch (Exception e) {
      System.err.println("** " + u + " " + s + "\n" + e);
    }
    if (s.endsWith(".n3#")) s = s.substring(0, s.length() - 4) + "#";
    return s;
  }
}
