#!/usr/bin/env ruby
#
# $Id: uaprofbot.rb,v 1.2 2003/04/21 15:56:36 danbri Exp $
# An UAProf RDF harvesting indexer
# try also: 
# ./uaprofbot.rb http://www.w3.org/2001/sw/Europe/200304/uaprof/profiles-toc.rdf
# for some UUProf CC/PP data.
# writeup: http://esw.w3.org/topic/UAProfIndex

$LOAD_PATH.unshift '../../lib/' # use local dev't copy of library

require 'RDF4R/scutter'
require 'squish'
require 'basicrdf'
require 'RDF4R/pastiche'
include RDF4R::Pastiche::SWAP

startpage = 'http://www.w3.org/2001/sw/Europe/200304/uaprof/profiles-toc.rdf'
startpage = ARGV.shift if ARGV.length > 0

sink=SimpleSQLGraph.new('dbi_driver'=>'DBI:Pg:scutter1','dbi_user'=>'danbri')
bot = RDF4R::SimpleScutter.new startpage

# takes refs to scutter, an RDF graph from a UAProf, and local SQL stash 
#
def extend_and_store(s,data,sink)
  profiles=Formula.new data
  new=[] # statements for 'de-Baggedd triples'

  rdf=Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
  prf=Namespace("http://www.wapforum.org/UAPROF/ccppschema-20010430#")

  puts "## loading: #{s.uri}"

  # Each rdf:Bag in the profile...
  profiles.each('pred'=>rdf+'type','obj'=>rdf+'Bag') do |bag|
    rel=nil
    thing=nil
    item=nil

    # Each thing related to that bag...
    # re api: one() or the() would've worked better. or return []
    profiles.each('obj'=>bag.subject.to_s) do |r|
      rel=r.predicate.to_s.gsub!(/#/,"#rel")  # a new relation... 
      thing=r.subject.to_s
    end

    # Each thing that's the subject of an rdf:_n property in that bag...
    # re api: what does Cwm do for containers?
    #
    profiles.each('subj'=>bag.subject.to_s) do |i|
      if i.predicate.to_s =~ /#{rdf}_/     # <icky/> 
        item=i.object.to_s
        new.push Statement.new(thing, rel, item)
        # should we add rdf:member properties too?
        puts "NEW: #{thing} #{rel} #{item} "
      end
    end

  end
  
  # add the found and addition triples to db
  sink.store_graph(data,s.uri)            # using old RubyRdf API...
  sink.store_graph(Graph.new(new), s.uri) # can we use Cwm stuff here?
                                                 # should we retag prov'ance?
  puts "Added #{data.size} raw triples, #{new.size} debagged triples.\n\n"
end


bot.pagehandlers.push lambda {|s,data| extend_and_store(s,data,sink) }
bot.errorhandlers.push lambda {|e| puts "error: #{e}" }

bot.run


# wishlist:
# API lacks container and list constructs. borrow Cwm's?
#
# note: the .subjects() method doesn't work yet. TODO: write test for that
