# METALOG PROJECT (http://www.w3.org/RDF/Metalog)
#********************************************************#
# Module that obtains an N-triple file of annotations    #
# from an ML file                                        #
#********************************************************#


# Import various constants
from definitions import *

# Import procedures
from procedure import *

#*********************************************************#
# Class that creates the listing of facts/clauses/queries #
#*********************************************************#
class Macro2Ntriple:

  "Class that computes N-Triples of an annotation"


  #********************************************************************************************
  # Initialization of the internal data structures
  #********************************************************************************************
  def __init__(self):
    self.file               = []
    self.file_assertions    = []
    self.file_que           = []
    self.data_assertions    = []
    self.data_que           = []
    self.assertions         = [[], []]
    self.que                = [[], []]

  #********************************************************************************************
  # Computes the annotation triples of the macros
  #********************************************************************************************
  def calcolafrommacro(self, ml, name_assertions, name_que, name_ann_assertions, name_ann_que):

    try:

      # Reads from an ml file and gives back the corresponding table containing pairs of the variables/string bindings of the assertions and queries
      [list_assertions, list_que, table_assque_pre_stri] = self.readml(ml)

      self.file_assertions = self.file
      self.file_que = self.file


      # Adjust recursively the nested macros
      self.file_assertions = aggiusta_ric(self.file_assertions)
      self.file_que = aggiusta_ric(self.file_que)

      prefix_assertions = []
      for el in self.file_assertions:
        prefix_assertions.append(el[0] + ' ' + el[1][0:len(el[1])-1])
     
      self.assertions  = [prefix_assertions, table_assque_pre_stri[0]]

      prefix_que = []
      for el in self.file_que:
        prefix_que.append(el[0] + ' ' + el[1][0:len(el[1])-1])
     
      self.que  = [prefix_que, table_assque_pre_stri[1]]
      
      # Computes N-triples from the table with the Macro
      self.calcolatentriple()

      # Save N-triples on disk
      self.saveann(name_ann_assertions, name_ann_que)

      return(1)

    except:
      print "\n\n  Error during computation of Annotation N-triples\n"
      print "  Original annotations cannot be used and deduced data cannot be created. Only the result data will be computed."
      return(-1)

  #********************************************************************************************
  # Reads from file ml and gives back table with couples [variable, string]
  #********************************************************************************************
  def readml(self, ml):
    op1 = open(ml)

    # Reading data from file
    data = []
    data_line = 'primo'
    tmp = ''
    while data_line <> '':
      data_line = op1.readline()
      while len(data_line)>1 and data_line[0] in (' ', '\n', '\t', '\r'):
        data_line = data_line[1:len(data_line)]
      while len(data_line)>1 and data_line[len(data_line)-1] in (' ', '\n', '\t', '\r'):
        data_line = data_line[0:len(data_line)-1]

      if data_line != '\n':
        u = len(data_line)-1
        while u > 0:
          carattere = data_line[u]
	  if carattere in ('\n', '\t', ' ', '\r') or ord(carattere) == 13:
            u =  u - 1
          elif carattere in ('?', '.'):
	      if tmp == '':
                data.append(data_line)
              else:
                if tmp[len(tmp)-1] == ',':
                  data.append(tmp + " " + data_line)
                else:
                  data.append(tmp + " " + data_line)
              tmp = ''
              u = -1
          elif carattere in (','):
            tmp = tmp + " " + data_line[0:u+1]
            u = -1
          else:
            tmp = tmp + " " + data_line[0:u+1]
            u = -1

    op1.close()


    # Deletes comments
    data1 = []
    for el in data:
      if len(el)>0:
        if el[0:8] != 'comment:':
          if el[len(el)-1]=='\n':
            el = el[0:len(el)-1]
          while len(el)>0 and el[0] == ' ':
            el = el[1:len(el)]
          tmp = ''
          for char in el:
            if char == '\t':
              tmp = tmp + ' '
            else:
              tmp = tmp + char
          if len(el)>0:
            data1.append(tmp)

    data2 = []
    for el in data1:
      in_str = 0
      prec = 'a'
      tmp = ''
      for char in el:
        if char == '"' and in_str == 0:
          in_str = 1
        elif char == '"' and in_str == 1:
          in_str == 0
          
        if prec != ' ' or char != ' ' or in_str == 1:
          tmp = tmp + char
        prec = char
      data2.append(tmp)

    listatmp = []
    lista_assertions = []
    lista_que = []
    assertions = []
    que = []
    for el in data2:
      varia = cerca_var(el)
      if varia != -1:
        [variable, tmp] = varia
        while tmp[0] == ' ':
          tmp = tmp[1:len(tmp)]
        if tmp[0:len(cost.REPRESENTS[0])]== cost.REPRESENTS[0]:
          listatmp.append([variable, tmp[0:len(tmp)]])
        else:
          word = analyze_word(el)
          if word[1] == 0:
            for el1 in word[0]:
              if not(el1 in lista_assertions):
                lista_assertions.append(el1)

            strtmp = el[0:len(el)-1]
            while strtmp[len(strtmp)-1] == ' ':
              strtmp = strtmp[0:len(strtmp)-1]
            assertions.append(strtmp)

          else:
            for el1 in word[0]:
          
              if not(el1 in lista_que):
                lista_que.append(el1)

            strtmp = el[0:len(el)-1]
            while strtmp[len(strtmp)-1] == ' ':
              strtmp = strtmp[0:len(strtmp)-1]
            que.append(strtmp)
      else:
        while el[len(el)-1] == ' ':
          el = el[0:len(el)-1]
        if el[len(el)-1] == '.':
          strtmp = el[0:len(el)-1]
          while strtmp[len(strtmp)-1] == ' ':
            strtmp = strtmp[0:len(strtmp)-1]
          assertions.append(strtmp)
        elif el[len(el)-1] == '?':
          strtmp = el[0:len(el)-1]
          while strtmp[len(strtmp)-1] == ' ':
            strtmp = strtmp[0:len(strtmp)-1]
          que.appent(strtmp)

    table_assque_pre_stri = [assertions, que]

    self.file = listatmp

    return(lista_assertions, lista_que, table_assque_pre_stri)


  #********************************************************************************************
  # Computes N-triples from the Macro table
  #********************************************************************************************
  def calcolatentriple(self):
    annotation = "<http://www.w3.org/RDF/Metalog#annotation>"
    ns = "<http://www.w3.org/RDF/Metalog#ns>"
    err = 0

    i = 1
    tmp1 = []
    for el in self.file_assertions:
      anoni = "_:an"+num2str(i)

      tmp1.append(anoni)
      tmp1.append(annotation)
      tmp1.append('"'+el[0]+'".')

      tmp1.append(anoni)
      tmp1.append(ns)
      str = calcstr(el[1])
      if str != -1:
        tmp1.append(str)
      else:
        err = 1
        tmp1.append("error")
        break
      i = i + 1

    if err == 0:
      self.data_assertions = tmp1
    else:
      self.data_assertions = -1

    err = 0
    i = 1
    tmp2 = []
    for el in self.file_que:
      anoni = "_:an"+num2str(i)

      tmp2.append(anoni)
      tmp2.append(annotation)
      tmp2.append('"'+el[0]+'".')

      tmp2.append(anoni)
      tmp2.append(ns)
      str = calcstr(el[1])
      if str != -1:
        tmp2.append(str)
      else:
        err = 1
        tmp2.append("error")
        break
      i = i + 1

    if err == 0:
      self.data_que = tmp2
    else:
      self.data_que = -1


  #********************************************************************************************
  # Save on disk the N-triples representing the macro
  #********************************************************************************************
  def saveann(self, nameassertions, nameque):

    # Save N-triples of the annotation assertions
    op1 = open(nameassertions, 'w')
    op1.write("# Annotations N-triples generated by Metalog\n\n")
    i = 0
    for el in self.data_assertions:
      if i == 3:
        op1.write("\n")
        i = 1
      else:
        i = i + 1
      op1.write(el+"\n")
    op1.close()

    # Saves N-triples of the annotation query
    op1 = open(nameque, 'w')
    op1.write("# Annotations N-triples generated by Metalog\n\n")
    i = 0
    for el in self.data_que:
      if i == 3:
        op1.write("\n")
        i = 1
      else:
        i = i + 1
      op1.write(el+"\n")
    op1.close()


#********************************************************************************************
# Adjust recursively the nested macros
#********************************************************************************************
def aggiusta_ric(assertions):
  tmp = []
  for el in assertions:
    str_tmp = el[1][len(cost.REPRESENTS[0])+1:len(el[1])-1]
    tmp.append([el[0],cost.REPRESENTS[0]+' '+aggiustastrric(str_tmp, tmp)+'.'])
  return(tmp)


#********************************************************************************************
# Adjust ercursively the string
#********************************************************************************************
def aggiustastrric(str_tmp, tab):
  if str_tmp[0]=='"':
    tmp1 = str_tmp[1:len(str_tmp)]

    n1 = find_char_norm(tmp1, '"')
    if len(tmp1)==n1+1:
      return(str_tmp)
    else:
      return('"'+tmp1[0:n1]+'" '+aggiustastrric(tmp1[n1+2:len(tmp1)], tab))
  elif str_tmp[0] in MINUSCOLE:

    n1 = find_char_norm(str_tmp, ' ')
    if n1 == -1:
      return(str_tmp)
    else:
      return(str_tmp[0:n1]+' '+aggiustastrric(str_tmp[n1+1:len(str_tmp)], tab))  
  elif str_tmp[0] in MAIUSCOLE:

    n1 = find_char_norm(str_tmp, ' ')
    if n1 == -1:
      vari = str_tmp
    else:
      vari = str_tmp[0:n1]

    sost = 0
    for el in tab:    
      if el[0] == vari:
        sost = 1
        stringa = el[1][3:len(el[1])-1]
        
        # Check if in the substitution part there is something to substitute
        stringa = aggiustastrric(stringa, tab)

        # Compute the substitution
        if stringa[0] == '"':
          sostituzione = stringa
        else:
          sostituzione = stringa    
#        sostituzione = cost.REPRESENTATION[0]+' '+sostituzione
        break

    if sost == 0:
      sostituzione = vari

    if n1 == -1:
      return(sostituzione)
    else:
      return(sostituzione + ' ' +aggiustastrric(str_tmp[n1+1:len(str_tmp)], tab))
        
  elif str_tmp == '':
    return('')
  else:
    return(str_tmp)

  
#********************************************************************************************
# Computes the annotation string between double quotes or with namespace
#********************************************************************************************
def calcstr(stringa):
  
  n1 = find_char_norm(stringa, '"')
  tmp = stringa[n1+1: len(stringa)]
  n2 = find_char_norm(tmp, '"')

  if tmp[n2+1]!='.' and (stringa[0:len(cost.REPRESENTS[0])] == cost.REPRESENTS[0]) and n2 != 0:
    num1 = find_char(stringa, '"')
    tmp = stringa[num1+1:len(stringa)]

    num2 = find_char(tmp, '"')
    str1 = tmp[0:num2]
    tmp = tmp[num2+1:len(tmp)]

    num3 = find_char(tmp, '"')
    tmp = tmp[num3+1:len(tmp)]
    
    num4 = find_char(tmp, '"')
    str2 = tmp[0:num4]
    tmp = tmp[num4+1:len(tmp)]
        
    tmp1 = ''
    for car in tmp:
      if car != ' ':
        tmp1 = tmp1 + car
    if tmp1 == '.':
      return("<" + str2 + "#" + str1 + ">.")
    else:
      return(-1)

  elif n2 == 0:
    tmp = tmp[1:len(tmp)]
    n3 = find_char_norm(tmp, '"')
    return("<" + tmp[n3+1:len(tmp)-2] + ">.")
    
    
  elif stringa[len(cost.REPRESENTS[0])+1:len(cost.REPRESENTS[0])+2] ==  '"' and stringa[len(stringa)-2] == '"':
    return(stringa[len(cost.REPRESENTS[0])+1:len(stringa)])

  else:
    return(-1)  


#********************************************************************************************
# Analysis of a string with a variable
#********************************************************************************************
def cerca_var(stringa):
  tmp = stringa
  i = 0
  num = ''
  while i < len(tmp) and tmp[i] in MAIUSCOLE:
    num = num + tmp[i]
    i = i + 1
  if i == 0:
    return(['', stringa])
  tmp = tmp[i:len(tmp)]
  if tmp[0] == ' ':
    tmp = tmp[1:len(tmp)]
  return(num, tmp)


#********************************************************************************************
# Looks for words or variables within an assertion/query, returns the list of the upper-case words and 
# a number indicating whether the analyzed sentence is an assertion or a query.
#********************************************************************************************
def analyze_word(el):
  tmp = []
  str_tmp = ''
  i = 0
  pred ='A'
  while i<len(el)-1:
    car = el[i]
    succ = el[i+1]
    if (car in MAIUSCOLE) and (pred in MAIUSCOLE or pred in (',','(',')','[',']',' ',';','-','.')) and (succ in MAIUSCOLE or succ in (',','(',')','[',']',' ',';','-','.','?')):
      str_tmp = str_tmp + car
    else:
      if str_tmp != '':
        tmp.append(str_tmp)
      str_tmp = ''
    pred = car
    i = i + 1
      
  if not(str_tmp in tmp) and str_tmp != '':
    tmp.append(str_tmp)

  i = len(el)-1
  while not(el[i] in ('?','.')) or i == 0:
    i = i - 1

  if el[i] == '.':
    return(tmp, 0)
  else:
    return(tmp, 1)
    
