import re

def mapQNameToPyName(prefix, lname) :
    if prefix == 'skos' :
        v = 'SKOS[\''+lname+'\']'
    elif prefix == 'skosxl' :
        v = 'XL[\''+lname+'\']'
    elif prefix == 'rdf' :
        v = 'RDF.'+lname
    elif prefix == 'rdfs' :
        v = 'RDFS.'+lname
    elif prefix == 'owl' :
        v = 'OWL.'+lname
    else :
        v = 'FIXME'
    return v

def renderTypeTriples(out, sid, s, t) :
    out.write('\t'+sid+'.append((XL[\''+s+'\'], RDF.type, OWL[\''+t+'\']))\n')
    if ( t == 'AnnotationProperty' or t == 'ObjectProperty' or t == 'DatatypeProperty' ) :
        out.write('\t'+sid+'.append((XL[\''+s+'\'], RDF.type, RDF.Property))\n')

def generatePythonCodeFromStatements(textfile, outfile):
    outfile.write('from rdflib import Namespace, RDF, RDFS, ConjunctiveGraph\n\n')
    outfile.write('def statements() :\n\n')
    outfile.write('\tXL = Namespace(\'http://www.w3.org/2008/05/skos-xl#\')\n')
    outfile.write('\tSKOS = Namespace(\'http://www.w3.org/2004/02/skos/core#\')\n')
    outfile.write('\tOWL = Namespace(\'http://www.w3.org/2002/07/owl#\')\n\n')
    outfile.write('\tstatements = []\n\n') 
       
    statementRe = re.compile('#\\s(\\S+)')
    typeRe = re.compile('# S\\d+ skosxl:(\\w+) is an instance of owl:(\w+)') 
    typeTwoRe = re.compile('# S\\d+ skosxl:(\\w+) and skosxl:(\\w+) are each instances of owl:(\\w+)')
    typeMultiRe = re.compile('# S\\d+ ((?:skosxl:\\w+, )+)skosxl:(\\w+) and skosxl:(\\w+) are each instances of owl:(\\w+)')
    domainRangeRe = re.compile('# S\\d+ The rdfs:(range|domain) of skosxl:(\\w+) is the class (\\w+):(\\w+)')
    
    domainsRangesRe = re.compile('# S\\d+ The rdfs:(range|domain) of each of ((?:skosxl:\\w+, )*)skosxl:(\\w+) and skosxl:(\\w+) is the class (\\w+):(\\w+)')
    
    disjointSKOSClassRe = re.compile('# S\\d+ skosxl:(\\w+) is disjoint with skos:(\\w+)')
    subPropertiesRe = re.compile('# S\\d+ ((?:skosxl:\\w+, )+)skosxl:(\\w+) and skosxl:(\\w+) are each sub-properties of skosxl:(\\w+)')
    subPropertyRe = re.compile('skosxl:(\\w+) is a sub-property of skosxl:(\\w+)')
    inverseRe = re.compile('skosxl:(\\w+) is the owl:inverseOf the property skosxl:(\\w+)')
    disjointSKOSClassesRe = re.compile('# S\\d+ skosxl:(\\w+) is disjoint with each of ((?:skos:\\w+, )*)skos:(\\w+) and skos:(\\w+)')
    subClassRe = re.compile('skosxl:(\\w+) is a sub-class of skosxl:(\\w+)')
    
    for line in textfile :
        
        # output line as comment
        outfile.write('\t'+line)
        
        # output statement array initialisation
        sid = statementRe.search(line).group(1)
        outfile.write('\t'+sid+' = []\n')
        
        # output triples appended
        
        typeReMatch = typeRe.search(line)
        typeTwoReMatch = typeTwoRe.search(line)
        typeMultiReMatch = typeMultiRe.search(line)
        domainRangeReMatch = domainRangeRe.search(line)
        domainsRangesReMatch = domainsRangesRe.search(line)
        disjointSKOSClassReMatch = disjointSKOSClassRe.search(line)
        subPropertiesReMatch = subPropertiesRe.search(line)
        subPropertyReMatchGroups = subPropertyRe.findall(line)
        inverseReMatchGroups = inverseRe.findall(line)
        disjointSKOSClassesReMatch = disjointSKOSClassesRe.search(line)
        subClassReMatchGroups = subClassRe.findall(line)
                
        if typeReMatch != None :
            #typeRe = re.compile('# S\\d+ skos:(\\w+) is an instance of owl:(\w+)') 
            renderTypeTriples(outfile, sid, typeReMatch.group(1), typeReMatch.group(2))
        
        elif typeTwoReMatch != None :
            renderTypeTriples(outfile, sid, typeTwoReMatch.group(1), typeTwoReMatch.group(3))
            renderTypeTriples(outfile, sid, typeTwoReMatch.group(2), typeTwoReMatch.group(3))
        
        elif typeMultiReMatch != None :
            # comma values
            for ln in typeMultiReMatch.group(1).split(',') :
                ln = ln.replace(' ','')
                if len(ln) > 0 :
                     renderTypeTriples(outfile, sid, ln.partition(':')[2], typeMultiReMatch.group(4))
                     renderTypeTriples(outfile, sid,  typeMultiReMatch.group(2), typeMultiReMatch.group(4))
                     renderTypeTriples(outfile, sid,  typeMultiReMatch.group(3), typeMultiReMatch.group(4))
        
        elif domainRangeReMatch != None :
            v = mapQNameToPyName(domainRangeReMatch.group(3), domainRangeReMatch.group(4)) 
#            if domainRangeReMatch.group(3) == 'xl' :
#                v = 'XL[\''+domainRangeReMatch.group(4)+'\']'
#            elif domainRangeReMatch.group(3) == 'rdf' :
#                v = 'RDF.'+domainRangeReMatch.group(4)
#            elif domainRangeReMatch.group(3) == 'rdfs' :
#                v = 'RDFS.'+domainRangeReMatch.group(4)
#            elif domainRangeReMatch.group(3) == 'owl' :
#                v = 'OWL.'+domainRangeReMatch.group(4)
#            else :
#                v = 'FIXME'
            outfile.write('\t'+sid+'.append((XL[\''+domainRangeReMatch.group(2)+'\'], RDFS.'+domainRangeReMatch.group(1)+', '+v+'))\n')
            
        elif domainsRangesReMatch != None:
            v = mapQNameToPyName(domainsRangesReMatch.group(5), domainsRangesReMatch.group(6)) 
#            if domainsRangesReMatch.group(5) == 'xl' :
#                v = 'XL[\''+domainsRangesReMatch.group(6)+'\']'
#            elif domainsRangesReMatch.group(5) == 'rdf' :
#                v = 'RDF.'+domainsRangesReMatch.group(6)
#            elif domainsRangesReMatch.group(5) == 'rdfs' :
#                v = 'RDFS.'+domainsRangesReMatch.group(6)
#            elif domainsRangesReMatch.group(5) == 'owl' :
#                v = 'OWL.'+domainsRangesReMatch.group(6)
#            else :
#                v = 'FIXME'
            # comma values
            for ln in domainsRangesReMatch.group(2).split(',') :
                ln = ln.replace(' ','')
                if len(ln) > 0 :
                    outfile.write('\t'+sid+'.append((XL[\''+ln.partition(':')[2]+'\'], RDFS.'+domainsRangesReMatch.group(1)+', '+v+'))\n')
            outfile.write('\t'+sid+'.append((XL[\''+domainsRangesReMatch.group(3)+'\'], RDFS.'+domainsRangesReMatch.group(1)+', '+v+'))\n')
            outfile.write('\t'+sid+'.append((XL[\''+domainsRangesReMatch.group(4)+'\'], RDFS.'+domainsRangesReMatch.group(1)+', '+v+'))\n')
            
            
        elif disjointSKOSClassReMatch != None : 
            outfile.write('\t'+sid+'.append((XL[\''+disjointSKOSClassReMatch.group(1)+'\'], OWL[\'disjointWith\'], SKOS[\''+disjointSKOSClassReMatch.group(2)+'\']))\n')

        elif subPropertiesReMatch != None :
#            outfile.write('\t# groups: '+str(subPropertiesReMatch.groups())+'\n')
            # comma values
            for ln in subPropertiesReMatch.group(1).split(',') :
                ln = ln.replace(' ','')
                if len(ln) > 0 :
                    outfile.write('\t'+sid+'.append((XL[\''+ln.partition(':')[2]+'\'], RDFS.subPropertyOf, XL[\''+subPropertiesReMatch.group(4)+'\']))\n')
            outfile.write('\t'+sid+'.append((XL[\''+subPropertiesReMatch.group(2)+'\'], RDFS.subPropertyOf, XL[\''+subPropertiesReMatch.group(4)+'\']))\n')
            outfile.write('\t'+sid+'.append((XL[\''+subPropertiesReMatch.group(3)+'\'], RDFS.subPropertyOf, XL[\''+subPropertiesReMatch.group(4)+'\']))\n')            
        
        elif len(subPropertyReMatchGroups)>0 :
            for group in subPropertyReMatchGroups :
                outfile.write('\t'+sid+'.append((XL[\''+group[0]+'\'], RDFS.subPropertyOf, XL[\''+group[1]+'\']))\n')

        elif len(inverseReMatchGroups)>0 :
            for group in inverseReMatchGroups :
                outfile.write('\t'+sid+'.append((XL[\''+group[0]+'\'], OWL[\'inverseOf\'], XL[\''+group[1]+'\']))\n')
                outfile.write('\t'+sid+'.append((XL[\''+group[1]+'\'], OWL[\'inverseOf\'], XL[\''+group[0]+'\']))\n')

        elif disjointSKOSClassesReMatch != None :
#            outfile.write('\t# groups: '+str(disjointClassesReMatch.groups())+'\n')
            # comma values
            for ln in disjointSKOSClassesReMatch.group(2).split(',') :
                ln = ln.replace(' ','')
                if len(ln) > 0 :
                    outfile.write('\t'+sid+'.append((XL[\''+disjointSKOSClassesReMatch.group(1)+'\'], OWL[\'disjointWith\'], SKOS[\''+ln.partition(':')[2]+'\']))\n')
            outfile.write('\t'+sid+'.append((XL[\''+disjointSKOSClassesReMatch.group(1)+'\'], OWL[\'disjointWith\'], SKOS[\''+disjointSKOSClassesReMatch.group(3)+'\']))\n')
            outfile.write('\t'+sid+'.append((XL[\''+disjointSKOSClassesReMatch.group(1)+'\'], OWL[\'disjointWith\'], SKOS[\''+disjointSKOSClassesReMatch.group(4)+'\']))\n')            
        
        elif len(subClassReMatchGroups)>0 :
            for group in subClassReMatchGroups :
                outfile.write('\t'+sid+'.append((XL[\''+group[0]+'\'], RDFS.subClassOf, XL[\''+group[1]+'\']))\n')

        else :
            outfile.write('\t# no triples to add\n')
                    
        # output statements appended
        outfile.write('\tstatements.append('+sid+')\n\n')
    
    outfile.write('\treturn statements\n\n')
        
    outfile.write('def schema() :\n\n')
    
    outfile.write('\tschema = ConjunctiveGraph()\n')
    outfile.write('\tfor statement in statements() :\n')
    outfile.write('\t    for triple in statement :\n')
    outfile.write('\t        schema.add(triple)\n\n')
    outfile.write('\treturn schema\n')
    outfile.write('# all done')
            
textfile = open('./skos.txt', 'r')
outfile = open('./skosxl.py', 'w')
generatePythonCodeFromStatements(textfile, outfile)
textfile.close()
outfile.close()
