#!/usr/bin/env ruby # # $Id: uaprofbot.rb,v 1.2 2003/04/21 15:56:36 danbri Exp $ # An UAProf RDF harvesting indexer # try also: # ./uaprofbot.rb http://www.w3.org/2001/sw/Europe/200304/uaprof/profiles-toc.rdf # for some UUProf CC/PP data. # writeup: http://esw.w3.org/topic/UAProfIndex $LOAD_PATH.unshift '../../lib/' # use local dev't copy of library require 'RDF4R/scutter' require 'squish' require 'basicrdf' require 'RDF4R/pastiche' include RDF4R::Pastiche::SWAP startpage = 'http://www.w3.org/2001/sw/Europe/200304/uaprof/profiles-toc.rdf' startpage = ARGV.shift if ARGV.length > 0 sink=SimpleSQLGraph.new('dbi_driver'=>'DBI:Pg:scutter1','dbi_user'=>'danbri') bot = RDF4R::SimpleScutter.new startpage # takes refs to scutter, an RDF graph from a UAProf, and local SQL stash # def extend_and_store(s,data,sink) profiles=Formula.new data new=[] # statements for 'de-Baggedd triples' rdf=Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") prf=Namespace("http://www.wapforum.org/UAPROF/ccppschema-20010430#") puts "## loading: #{s.uri}" # Each rdf:Bag in the profile... profiles.each('pred'=>rdf+'type','obj'=>rdf+'Bag') do |bag| rel=nil thing=nil item=nil # Each thing related to that bag... # re api: one() or the() would've worked better. or return [] profiles.each('obj'=>bag.subject.to_s) do |r| rel=r.predicate.to_s.gsub!(/#/,"#rel") # a new relation... thing=r.subject.to_s end # Each thing that's the subject of an rdf:_n property in that bag... # re api: what does Cwm do for containers? # profiles.each('subj'=>bag.subject.to_s) do |i| if i.predicate.to_s =~ /#{rdf}_/ # item=i.object.to_s new.push Statement.new(thing, rel, item) # should we add rdf:member properties too? puts "NEW: #{thing} #{rel} #{item} " end end end # add the found and addition triples to db sink.store_graph(data,s.uri) # using old RubyRdf API... sink.store_graph(Graph.new(new), s.uri) # can we use Cwm stuff here? # should we retag prov'ance? puts "Added #{data.size} raw triples, #{new.size} debagged triples.\n\n" end bot.pagehandlers.push lambda {|s,data| extend_and_store(s,data,sink) } bot.errorhandlers.push lambda {|e| puts "error: #{e}" } bot.run # wishlist: # API lacks container and list constructs. borrow Cwm's? # # note: the .subjects() method doesn't work yet. TODO: write test for that