#!/usr/bin/python2.6
# LICENCE: CC-BY
# See also http://webofdata.wordpress.com/2010/04/14/oh-it-is-data-on-the-web/

import urllib, os, sys
from swap import notation3, uripath # http://www.w3.org/2000/10/swap
#  PYTHONPATH includes /devel/WWW/2000/10/swap/..
from xml.dom.minidom import parse, parseString # http://docs.python.org/library/xml.dom.minidom.html

SERVICE = "http://services.odata.org/OData/OData.svc/";
# Also try http://uriburner.com/about/html/http/services.odata.org/OData/OData.svc/

FILE_ROOT = "data/" # Where we put the resulting files
DEFAULT_REL_NS = "http://www.w3.org/2010/odata/common#"

#  ToDo:   Use Namespaces
#           Datatypes
agenda = { SERVICE: FILE_ROOT + 'index.n3'};
done = {};

def dent(level):
    return ' ' * 4 * level;
    
# Common namespace for relations without full URI
def common(value):
    if ':' in value: return value
    return DEFAULT_REL_NS + value

MAP = { ':id' : ':subject', ':title': 'dc:title' }
IGNORE = { 'content': 1 }

def contentsToObject(ele, inputURI,  outputBase, level=0, subject=None):

    def link(href):
        abs = uripath.join(SERVICE, href);  # @@ Should take xml:base from XML
        target =  abs.replace('(', '/').replace(')/', '/').replace(')', '/');
        if target.endswith('/'): target += 'index';
        rel = uripath.refTo(SERVICE, target)  # service-root relative
        assert "http:" not in rel, "SERVICE=%s, target=%s" % (SERVICE, target)
        rel += ".n3"   # Also helps distinguish file form directory
        if abs not in agenda and abs not in done: agenda[abs] = FILE_ROOT + rel; # filename rel to pwd
        print dent(level)+"Link to href: %s -> rel: %s" %(href, FILE_ROOT + rel)
        assert  outputBase.startswith(FILE_ROOT)
        dif = len(outputBase[len(FILE_ROOT):].split('/')) - 1; # number of slashes
        return ('../' * dif) + rel + "#it"


    s = ""
#    if ele.localName in IGNORE:
#        assert len(ele.childNodes) == 1
#        return contentsToObject(ele.firstChild,inputURI,  outputBase, level, subject)

    if len(ele.childNodes) > 0:
        has = {}
        for e in ele.childNodes:
            has[e.nodeType] = 1;
        if has.get(e.ELEMENT_NODE,0):
            s ='\n' + dent(level)+'['
            if ele.localName == 'properties':
                assert subject != None
                s+= '=<%s>; '% link(subject);
            href = ele.getAttribute('href')
            if href:
                s+= '= <%s>' % link(href);
            for e in ele.childNodes:
                if e.nodeType != e.ELEMENT_NODE: continue
                if len(e.childNodes) == 0:
                    print dent(level)+'pred no contents: ' + `e.nodeType` +" : " + `e.localName`
                    if (e.localName == 'link'):
                        rel = e.getAttribute('rel')
                        href = e.getAttribute('href')
                        s += ' <%s> [= <%s>' %(common(rel), link(href))
                        title = e.getAttribute('title');
                        if (title): s+= '; :role '+ notation3.stringToN3(title).encode('utf8')
                        s += '];'
                    elif (e.localName == 'category'):
                        print dent(level)+" *** Category"
                        term = e.getAttribute('term').capitalize()
                        scheme = e.getAttribute('scheme');
                        s += ' a <%s#%s>;' %(scheme,term)  #  @@ gen RDF class -- make schema?
                    else:
                        pass # s += ' :' + e.localName  +' '+ contentsToObject(e, inputURI,  outputBase, level+1)
                else:
                    print dent(level)+'pred w contents:' + `e.nodeType` +" : " + `e.localName`
                    #if (e.localName in IGNORE):
                    #    return contentsToObject(e,inputURI,  outputBase, level, subject)
                    pred = ':' + e.localName;
                    if (pred == ':id'):
                        subject = e.firstChild.data
                        print dent(level)+"Set subject to: "+subject
                    pred = MAP.get(pred, pred)
                    # if pred == ':id': pred = '=';
                    s += ' ' + pred  +' '+ contentsToObject(e, inputURI, outputBase, level+1, subject)
            s += '\n' + dent(level)+'];\n'
        elif has.get(e.TEXT_NODE,0):
            for e in ele.childNodes:
                if e.nodeType != e.TEXT_NODE: continue
                if ele.localName == 'id': s+= '<%s>' % link(e.data);
                else:
                    t = ele.getAttribute('type')
                    if t and t in ['Edm.Int32', 'Edm.Int64', 'Edm.Int16']:
                        s += e.data;  # Numeric  @@ Do dates Edm.DateTim
                    else:
                        s += notation3.stringToN3(e.data).encode('utf8');
                print dent(level)+ele.localName+' text: '+e.data
            s+= ';'
            
    else: # No contents at all (problem?)
        print "@@ Ooops found empty property list for "+ele.localName
    return s
        
def serviceToN3(ele, inputURI, outputBase):
    if (ele.localName == 'feed'):
        return '<> a :FeedDocument; :feed ' + contentsToObject(ele, inputURI, outputBase, 1) + '.' ;
    if (ele.localName == 'entry'):
        return '<> a :EntryDocument; :entry ' + contentsToObject(ele, inputURI, outputBase, 1) + '.' ;
    return ('<#it> :%s ' % ele.localName) + contentsToObject(ele, inputURI, outputBase, 1) + '.' ;

def mkdirOpen(fn, mode):
    print "Opening "+fn
    slash = fn.rfind('/')
    if slash >= 0:
        dir = fn[:slash+1]
        if not os.path.exists(dir): os.makedirs(dir)
    return open(fn, mode)
    
def convertFile(inputURI, fn):
    input = urllib.urlopen(inputURI);
    print "\nProcessing "+inputURI
    buffer = input.read()
    dom = parseString(buffer);
    if 1:
        xmlfile = mkdirOpen(fn.replace(".n3", ".xml"), "w")
        xmlfile.write(buffer)
        xmlfile.close()

    assert inputURI.startswith(SERVICE)
    outputBase = fn[:fn.rindex('/')+1]
    print "Writing to "+fn + " with base "+ outputBase
    
    output = mkdirOpen(fn, "w");
    output.write("""# generated by odata.py from <"""+inputURI+""">
    @prefix : <http://www.w3.org/2010/odata/schema#>.
    @prefix common: <""" + DEFAULT_REL_NS + """>.
    @prefix dc: <http://purl.org/dc/elements/1.1/> .

""");
    output.write(serviceToN3(dom.documentElement, inputURI, outputBase));
    output.write('\n#ENDS\n');
    output.close();
    
    
    return
        
# convertFile(SERVICE, './index.n3')

while (len(agenda) > 0):
    next = agenda.keys()[0];
    fn = done[next] = agenda[next]
    del agenda[next]
    convertFile(next, fn)
# ends
