#!/usr/bin/ruby

# unipen2inkml: convert an unipen file to inkml

# Unipen definition taken from 
#   http://hwr.nici.kun.nl/unipen/unipen.def
# InkML definition taken from 
#   http://www.w3.org/TR/2004/WD-InkML-20040928



# $Id: unipen2inkml.rb,v 1.4 2004/10/16 17:24:14 mf Exp $
# 
# Copyright Max Froumentin 2004.
# 
# Use and distribution of this code are permitted under the terms of the
# W3C Software Notice and License
# http://www.w3.org/Consortium/Legal/copyright-software-19980720



class UnipenInputChunker
  def pop
    if @top != nil
      ret = @top
      @top = nil
    else
      ret=gets
    end
    return ret
  end
  
  def push(line)
    @top=line
  end

  def eof?
    STDIN.eof? and @top == nil
  end
  
  
  def is_not_start_line(line)
    (line =~ /^\.[A-Z]+/) == nil
  end
  
  #read a single chunk from the file. A chunk is ".KEYWORD ... "
  def next_chunk
    chunk=pop
    if is_not_start_line(chunk)
      puts "Error: line '"+chunk+"' should start with \".KEYWORD \"."
    end   
    next_line=pop
    while is_not_start_line(next_line)
      chunk+=next_line
      next_line=pop
    end
    push(next_line)
    chunk
  end 
end

##############################################################################
# Device Info

class ChannelData 
  attr_accessor :resolution_value
  attr_accessor :resolution_unit
end

class UnipenDevice
  
  attr_reader :channels # a hash of "channel name" => "channeldata"
  attr_reader :has_data
  attr_accessor :points_per_second #sample rate 
  
  def set_channel_resolution(channel_name,value,unit)
    if @channels==nil
      @channels=Hash.new
    end
    @channels[channel_name] = ChannelData.new
    @channels[channel_name].resolution_value=value
    @channels[channel_name].resolution_unit=unit
  end
end
  
##############################################################################
# UnipenInputProcessor
#  handles chunks

class UnipenInputProcessor

  attr_accessor :unipenDevice

  def initialize
    @unipenDevice=UnipenDevice.new
  end

  def eat_chunk(chunk)
    if chunk =~ /^\.COMMENT / 
      # followed by arbitrary text
      processComment(chunk[9..-1])
    elsif chunk =~ /^\.COORD /
      # followed by "a subset of X, Y, T, P, Z, B, RHO, THETA, PHI, 
      # including at least X and Y."
      processCoords(chunk[7..-1])
    elsif chunk =~ /^\.X_POINTS_PER_INCH /
      unipenDevice.set_channel_resolution("X",chunk[18..-1].to_i,"ppi")
    elsif chunk =~ /^\.Y_POINTS_PER_INCH /
      unipenDevice.set_channel_resolution("Y",chunk[18..-1].to_i,"ppi")
    elsif chunk =~ /^\.X_POINTS_PER_MM /
      unipenDevice.set_channel_resolution("X",chunk[18..-1].to_i,"ppc")
    elsif chunk =~ /^\.Y_POINTS_PER_MM /
      unipenDevice.set_channel_resolution("Y",chunk[18..-1].to_i,"ppc")
    elsif chunk =~ /^\.POINTS_PER_SECOND /
      unipenDevice.points_per_second=chunk[19..-1].to_i
    elsif chunk =~ /^\.PEN_DOWN/
      writeTrace("penDown",chunk[9..-1])
    elsif chunk =~ /^\.PEN_UP/
      writeTrace("penUp",chunk[7..-1])
    elsif chunk =~ /^\.SEGMENT/
      readSegment(chunk[8..-1])
    end
    # Note: .PAD is just plain text, so doesn't match <captureDevice>
  end
  
  def spit_out 
    if @unipenDevice.channels != nil
      puts "<captureDevice>"
      @unipenDevice.channels.each do | name, data |
        puts "  <channelDef name='"+name+"'>"
        if data.resolution_value != nil
          puts "    <resolution value='"+data.resolution_value.to_s + \
          "' units='"+data.resolution_unit+"'/>"
        end
        puts "  </channelDef>"
      end
      puts "</captureDevice>"
    end
  end
        
  private

  def readSegment(string)
    tokens=string.split
    puts "  <traceRef unipen:type='"+tokens[0]+"' unipen:quality='"+tokens[2]+"' unipen:label='"+tokens[3]+"' from='"+tokens[1].split("-")[0]+"' to='"+tokens[1].split("-")[1]+"'/>"
  end

  def writeTrace (updown, string)

    puts "<trace type='"+updown+"'>"+string+"</trace>"

    # splitty not necessary at this point. Probably later
    #string.split.each do |sample|
    #  $stdout.write(sample+" ")
    #end
    #puts "</trace>"
  end

  def processCoords(string)
    puts "<traceFormat>"
    puts "  <regularChannels>"
    # now parse the string, tokenize with ' '
    string.split.each { |string|
      puts("    <channel name='"+string+"'/>")
    }
    puts "  </regularChannels>"
    puts "</traceFormat>"
  end

  def processComment(string)
    puts "<!--"+string.gsub("--","-*-")+"-->" # -- not allowed in XML comments
  end

end

##############################################################################
# Main

input=UnipenInputChunker.new
proc=UnipenInputProcessor.new

puts "<ink xmlns='http://www.w3.org/2003/InkML' xmlns:unipen='http://www.unipen.org/ns'>"
while not input.eof? 
  proc.eat_chunk(input.next_chunk)
end
proc.spit_out
puts "</ink>"
