#!/usr/local/bin/ruby # # Ruby RDF stuff # danbri@w3.org # http://esw.w3.org/topic/RubyRdf # Overview: # we use the classes 'Graph', 'Node', and 'Statement' # # todo: # - associate Nodes with Graphs DONE # - implement ask(template_statement) method (slog through the variations!) # - implement ntriples-based i/o INPROGRESS # - use method_missing to catch property queries on nodes DONE # - figure out how to test this and Perl version at same time /? # - document the stuff it doesn't do w.r.t. RDF specs /? # - find out about various Ruby features I'm unclear on (see 'todo:' notes) # - continue migrating ask() to return graphs not nodes DONE # - add Mozilla-based graph API # - fix the worst inefficiencies (eg. all those new graphs / statements) # - make basicrdf into a module, http://www.rubycentral.com/book/tut_modules.html # Currently: # Nodes and Graphs # # a node has a graph; this relationship may be transient, fleeting etc # default is for node to use the Graph.default graph. # we only ever have one node per URI, so this could be a source of # confusion if the graph is seen as dominating the node's state, since # nodes may be in several graphs, and graphs may mention nodes that # appear elsewhere. Is this just a matter of better documentation needed? ### utilities def hashcodeIntFromString (data) require 'sha1' sh = SHA1::new(data) dig = sh.digest() r = (dig[0])|((dig[1]) << 8)|((dig[2]) << 16)|((dig[3]) << 24) # restrict to signed 32 bit int (didn't need this in java) if ( r > ( ( 1 << 31 ) -1 ) ) return ( r - ( 1 << 32 ) ) end return r end ############################################################################## # class Node @@nodes = {} attr_accessor :content, :graph # constructor (make this private? use get* instead) # N-Triples parser seems to still be using this. @@todo def initialize (content) ## puts "NEW NODE: #{content}" srand() @content = content @graph=nil @@db=nil end def inspect "#@content " end # Get a Node given its URI, recycling where available def Node.getResource( content, graph=Graph.default() ) return @@nodes[content] if @@nodes[content] # puts "NEW NODE: #{content}" node = Node.new(content) node.graph=graph #puts "NEW NODE: #{content}" @@nodes[content]=node return node end # get a fresh blank node # notes: couldn't see how to have Node.getResource() work def Node.getBlank(graph=Graph.default, id=nil) if (id == nil) content = '_:bnodeid:' + rand(100000000).to_s() else content = id # puts "cached genid Node.getBlank: '#{id}' " end node = Node.new(content) node.graph=graph @@nodes[content]=node return node end def Node.getLiteral(content, graph=Graph.default) return @@nodes[content] if @@nodes[content] node = Node.new("\""+content) node.graph=graph @@nodes["\""+content]=node return node end # a stringified node doesn't include '"' around literals (genids? @@ISSUE) def to_s string = "#@content" # print "*" string.gsub!(/^\"/,"") string.gsub!(/\"$/,"") string.gsub!(/\"-ja$/,"") # todo: lang codes from ntriples return string end # unicode encoding for XML output def to_utf8_xml return self.to_s.gsub(/\\u(....)/) {|s| "\&#x#{$1};" } end def is_resource if (@content =~ /^"/) return false end return true end def to_nt_term return Node.wrap(@content) end def sha1 # puts "SHA1ing #{@content} " return hashcodeIntFromString(@content) end # wrap literals in quotes, data in <> blanks in ??? # for NTriples/N3 export # def Node.wrap(data) wrap = "#{data}" if (data =~ /^"/) wrap = "#{data}" if (data =~ /^_:/) wrap = "<#{data}>" unless wrap return wrap # todo: multiline, XML content, charset etc... end # catch missing method calls, assume they're rdf property queries # todo: think about error handling; what would be sane here? # def method_missing(methid) str = methid.id2name # print "Missing method ",str,"\n" # for rdf property access lhs,rhs = str.split(/_+/,2) # print "LHS: '#{lhs}' RHS: '#{rhs}' \n" uri = self.graph.ns(lhs) # print "URI: '#{uri}'\n" propuri = uri+rhs # print "Should query node for property of type #{propuri}\n" ans = self.graph.ask(Statement.new(self,Node.getResource(propuri),nil)) #todo: scalar or list? raw= ans.objects ret=[] ## TODO: we're returning statement objects wrapped in " # raw.each do |o| # o.gsub!(/^"/,"") # o.gsub!(/"$/,"") # ret.push o # end return raw end end ############################################################################# # class Graph @@defaultdb=0 # reference to default database, made on demand in default() def initialize( statements ) @db={} # counter(notyet, just flag) for each statement we've met @fp={} @bp={} @ns={} return unless statements statements.each { |statement| @db[statement]=1 } statements.each {|statement| tell(statement) } end def Graph.default() if (@@defaultdb==nil) print "Getting default graph\n" @@defaultdb=Graph.new([]) ## should put the basic RDF/RDFS stuff here return @@defaultdb end end # XML Namespace registration # def reg_xmlns(uri, prefix) # print "registering an xml namespace: #{uri} as #{prefix} \n" @ns[prefix]=uri return uri end def ns(prefix) # print "Looking up '#{prefix}' in registry, #{@ns.inspect} \n" return @ns[prefix] end # Add a bunch of statements (merge into tell() maybe?) # def tell_all ( statements ) statements.each { |s| tell(s) } end # tell the graph something (add and index a statement) # def tell ( statement ) @db[statement]=1 # store objects under subject+predicate # sp_list = @fp["#{statement.subject} | #{statement.predicate}"] if (sp_list) # puts "Storing object under EXISTING s/p, statement= #{statement}" # puts "Inspecting existing sp_list: #{sp_list.inspect} \n" sp_list.push(statement.object) #todo: we should order this list (and po) end if (!sp_list) # puts "Storing object under NEW s/p, statement = #{statement}" sp_list = [statement.object] @fp["#{statement.subject} | #{statement.predicate}"]=sp_list end # store subjects under predicate+object # po_list = @bp["#{statement.predicate} | #{statement.object}"] if (po_list) # puts "Storing subject under EXISTING p/o, statement= #{statement}" po_list.push(statement.subject) end # todo: lookup else syntax for Ruby if (!po_list) # puts "Storing subject under NEW p/o, statement= #{statement}" po_list = [statement.subject] @bp["#{statement.predicate} | #{statement.object}"]=po_list end end def toNtriples() out = "\n# Ruby-RDF NTriple serializer $Id: basicrdf.rb,v 1.19 2003/04/21 19:51:55 danbri Exp $ \n#\n\n" # forward pointers -- from subject+predicate to object(s) @fp.each_key { |key| s,junk, p = key.split(/ | /,3) stem = "<#{s}> <#{p}>" stem = "#{s} <#{p}>" if s =~ /^_:/ # print "Value: #{@fp[key]} \n" @fp[key].each { |value| out += "#{stem} #{value.to_nt_term} .\n" } } return out end # A simple dump to the Dot format used by GraphViz, http://www.graphviz.org/ # def toDotGraph() out ="" # "\n# Ruby-RDF GraphViz *.dot serializer $Id: basicrdf.rb,v 1.19 2003/04/21 19:51:55 danbri Exp $ \n#\n\n" out += 'digraph G { size="25,25"; ratio=auto; node [shape=ellipse,fontsize=11]; rankdir=LR;' # Now we want this: # r1 [label="_:0"]; # r2 [label="file:/Users/aaronsw/Projects/cwm/soap.n3#Thing"]; # r1 -> r2 [label="rdf::type" fontsize=12]; #etc res={} arc=[] predicates={} types={} lookup={} count=1 # forward pointers -- from subject+predicate to object(s) @fp.each_key { |key| s,junk, p = key.split(/ | /,3) @fp[key].each { |value| o=value.to_s # stringify? if (!lookup[s]) res[count] = s # puts "S: #{s} = #{count}\n" lookup[s] = count count += 1 end if (!lookup[p]) res[count] = p # puts "P: #{p} = #{count}\n" lookup[p] = count predicates[p]=1 count += 1 end if (!lookup[o]) res[count] = o # puts "O: #{o} = #{count}\n" lookup[o] = count if (p =~ /#type/) #puts "#{o} may be a type" types[o]=1 end count += 1 end # puts "Storing: p:#{p} s:#{s} o:#{o} \n" #BUG: mayn't have stored p and o. should make a register() function arc.push [lookup[s],lookup[p],lookup[o]] } } lookup.each_key do |text| n = lookup[text] l = res[n] l.gsub!(/"/,"") if (!predicates[l]) #trim nodes! l.gsub(/ out += "r#{n} [label=\"#{l}\"]; \n" end end arc.each do |edge| arclabel=res[edge[1]] arclabel.gsub!(/http:\/\/www\.w3\.org\/1999\/02\/22-rdf-syntax-ns#/,"") out += "r#{edge[0]} -> r#{edge[2]} [label=\"#{arclabel}\"]; \n" end out += "\n\n}\n\n" # out += "debug: \n RES: #{res.inspect} \nARC:\n#{arc.inspect} \nLOOKUP:\n#{lookup.inspect}\n" return out end def toSQL() # We generate SQL script based on a SHA1 dump, same as java code # see Node.sha1 method rdfsha1={} out = " --- SQL-RDF dump of RDF database $Id: basicrdf.rb,v 1.19 2003/04/21 19:51:55 danbri Exp $ \n" # sub pred obj person src asserted @fp.each_key do |key| s,junk, p = key.split(/ | /,3) sub = Node.getResource(s, self) pred = Node.getResource(p, self) rdfsha1[s] = sub.sha1 rdfsha1[p]=pred.sha1 stem = "insert into triples values ('#{sub.sha1}', '#{pred.sha1}', " @fp[key].each do |value| isres = (value.is_resource)? 't':'f' out += "#{stem} '#{value.sha1}','assertid-src-notyet:ruby-rdf:$Id: basicrdf.rb,v 1.19 2003/04/21 19:51:55 danbri Exp $','personidid:notyet','#{isres}'); \n" object = "#{value}" object.gsub!("'","") # zapping not ideal, but "\'" and "\\'" failed object.gsub!("^\"","") object.gsub!("\"$","") rdfsha1[ "#{object}" ] = value.sha1 end end rdfsha1.each_key do |thing| puts "insert into resources values ( '#{rdfsha1[thing]}', '#{thing}' );\n" end return out end def toSQLInserts(src='assertid-src-notyet:ruby-rdf:$Id: basicrdf.rb,v 1.19 2003/04/21 19:51:55 danbri Exp $') # We generate SQL script based on a SHA1 dump, same as java code # see Node.sha1 method rdfsha1={} out=[] # sub pred obj person src asserted @fp.each_key do |key| s,junk, p = key.split(/ | /,3) sub = Node.getResource(s, self) pred = Node.getResource(p, self) rdfsha1[s] = sub.sha1 rdfsha1[p] = pred.sha1 # puts "toSQLInsert: #{s}->#{sub.sha1} ; #{p}->#{pred.sha1} " stem = "insert into triples values ('#{sub.sha1}', '#{pred.sha1}', " @fp[key].each do |value| isres = (value.is_resource)? 't':'f' out.push "#{stem} '#{value.sha1}', '#{src}','' ,'#{isres}'); \n" object = "#{value}" #.clone object.gsub!(/'/,"") # zapping not ideal, but "\'" and "\\'" failed# object.gsub!(/^\"/,"") object.gsub!(/\"$/,"") ## object.gsub!(/\"-ja$/,"") object.gsub!(/\\/, '\&\&') # puts "MAP: #{object} -> #{value.sha1} " # FIXME: this is a stinky hack. if ((rdfsha1["#{object}"] != value.sha1) && (rdfsha1["#{object}"] != nil)) tmp=rdfsha1["#{object}"] #puts "Warning, about to overwrite #{tmp} with #{value.sha1} for '#{object}' (sha1:#{(Node.new(object).sha1.to_s)})" out.push "insert into resources values ( '#{tmp}', '#{object}' );\n" # this is wrong and confused, but may work as a stopgap. end rdfsha1[ "#{object}" ] = value.sha1 #hmm 'pre-tweaked-text'=>post-tweak.sha1 end end #danbrizzz rdfsha1.each_key do |thing| # puts "KEY: #{thing} --> '#{rdfsha1[thing]}' " out.push "insert into resources values ( '#{rdfsha1[thing]}', '#{thing}' );\n" end return out end # return all the blunt ends of the arcs in this graph def subjects() ans=[] @db.each_key{ |statement| ans.push(statement.subject) } return ans end # return all the sharp ends of the arcs in this graph def objects() ans=[] @db.each_key{ |statement| ans.push(statement.object) } return ans end # return all the label nodes for the arcs in this graph def predicates() ans=[] @db.each_key{ |statement| ans.push(statement.predicate) } return ans end def size() return @db.length end def statements return @db.keys end # Graph.ask # # Basic query method for our RDF graph. passed a (template) statement w/ nils # ...and returns a graph (which we can probe with subjects(), predicates() etc # # Notes: this is all pretty inefficient, creating new graphs all # over the shop when we needn't, new statements etc etc. # todo: change @fp and @bp to store refs to statements not nodes # ...and think about how we can return sub-graph matches # without having to go index them. maybe do indexing on demand? # (see also tell() def ask(query) # puts "Vapourware ask/query method called, template statement: #{query} " # ooo: dump all statements in the graph # spo: ooo (nil,nil,nil) if (query.predicate==nil && query.subject==nil && query.object==nil) dump =[] @db.each_key() {|k| dump.push(k)} return Graph.new(dump) end # spo: oxo: dump all statements in the graph with fixed predicate # notes: Statement or Node class should be more useful for comparisons if (query.subject==nil && query.object==nil) dump =[] @db.each_key() do |k| # puts "TRIPLE: #{k.predicate.inspect} vs #{query.predicate}\n" dump.push(k) if k.predicate.to_s == query.predicate.to_s end return Graph.new(dump) end # spo: xxx: is this statement in the graph? if (query.predicate && query.subject && query.object) if ( @db[query] == nil) # puts "Test failed: statement #{query.inspect} is not in graph\n" return Graph.new( [ ] ) end if ( @db[query] >0) # puts "Test succeeded: statement #{query.inspect} is in graph\n" return Graph.new([query]) if (@db[query]) end end # spo: xxo if (query.predicate && query.subject && query.object==nil) # puts "xxo: get value(s) given sp" # puts "subject = '#{query.subject}' predicate= '#{query.predicate}' \n" # puts "Answer lookup: " #old: return @fp["#{query.subject} | #{query.predicate}"] obs = @fp["#{query.subject} | #{query.predicate}"] response=[] query.subject.graph=self # Is this wrong? query.predicate.graph=self if obs # suppress duplicates (on node identity not strings...) obs.uniq! seen={} new=[] obs.each {|i| new.push(i) if !(seen[i.to_s]); seen[i.to_s]=1 } obs = new.uniq end obs.each { |object| object.graph=self; response.push(Statement.new(query.subject,query.predicate, object)) } if obs ans = Graph.new(response) return ans end ## TODO: THIS IS INEFFICIENT. STORE STATEMENTS IN FP and BP!!! if (query.predicate && query.subject==nil && query.object) # puts "oxx: get subjects(s) given po: #{query.inspect}" subs = @bp["#{query.predicate} | #{query.object}"] # puts "Subs: #{subs.inspect} \n\nbp: \n\n#{@bp.inspect} \n\n" # puts "fp\n\n#{@fp.inspect} \n\n" response=[] query.predicate.graph=self query.object.graph=self if subs # suppress duplicates (on node identity not strings...) subs.uniq! seen={} new=[] subs.each {|i| new.push(i) if !(seen[i.to_s]); seen[i.to_s]=1 } subs = new.uniq end subs.each { |subject| subject.graph=self; response.push(Statement.new(subject,query.predicate,query.object)) } if subs ans = Graph.new(response) # puts "Returning a graph! details: #{ans.inspect} \n" return ans end ## more query facilities needed here # done: xxx ooo xxo oxx oxo # todo properly: xox oox xoo (quick hack 2003-04-20 to make them work at all) # spo: xoo (o=nil) # FIXME: this is inefficient. Use indices (which are also inefficient!) if (query.predicate==nil && query.subject && query.object==nil) ans=[] sg=ask(Statement.new(nil,nil,nil)).each do |st| ##zzzz #STDERR.puts "Considering '#{st.inspect}' against s='#{query.subject}'" if st.subject.to_s == query.subject.to_s ans.push(st) else #STDERR.puts "Failed to match: '#{st.subject}' with '#{query.subject}'" end end return Graph.new(ans) raise "Graph.ask() doesn't implement pso: xoo matches" end # spo: oox # FIXME: inefficient scan; use indices if (query.predicate==nil && query.subject==nil && query.object) ans=[] sg=ask(Statement.new(nil,nil,nil)).each do |st| ans.push(st) if st.object.to_s==query.object.to_s # FIXME: .equals? end return Graph.new(ans) raise "Graph.ask() doesn't implement oox matches" end # FIXME: inefficient scan; use indices if (query.predicate==nil && query.subject && query.object==nil) ans=[] sg=ask(Statement.new(nil,nil,nil)).each do |st| ans.push(st) if st.subject.to_s==query.subject.to_s # FIXME: .equals? end return Graph.new(ans) raise "Graph.ask() doesn't implement spo: xoo matches" end end # nodes that know... (about a graph) def getResource(content) return Node.getResource(content, self) end def getBlank() return Node.getBlank(self) end def getLiteral(content) return Node.getLiteral(content, self) end end ########################################################################### # class Statement attr_accessor :predicate, :subject, :object def initialize (subject, predicate, object) subject = Node.getResource(subject) if subject.class == String predicate = Node.getResource(predicate) if predicate.class == String object = Node.getResource(object) if object.class == String #todo #print "In statement init. #{subject} -- #{predicate} -> #{object} \n" @subject = subject @predicate = predicate @object = object end # def inspect # "<#@subject> <#@predicate> <#@object> " # end def to_s " #@subject> <#@predicate> <#@object> .\n" end end ############################################################################ # A rather basic NTriples parser, cut down from: # http://www.w3.org/2000/10/swap/n-triples2kif.pl # http://www.w3.org/TR/rdf-testcases/#ntriples # seeAlso: # http://www.rubycentral.com/book/tut_stdtypes.html class Loader attr_accessor :base, :files # clean up a term t and return # litOK: flag whether literals acceptable or not def Loader.term (t, litOK) t.chomp() t=t.sub(/^\s*\s*$/,'') # a whole bunch more stuff see perl script above # print "Modified t: #{t.inspect} \n" return t end # call out to an RDF2Ntriples parser def Loader.fn2nt (fn) print "Running external parser on filename #{fn} \n" ##TODO! return end def initialize (files='../..') @files = '../..' @@files = @files end ## FIXME: hacks to get RDF from nice parser def Loader.parse_rdf(filename, base_uri, parser="rubyexpat") require 'RDF4R/Consumer/Standard' require 'RDF4R/Driver/XMLParser' require 'RDF4R/Driver/RDFParser' require 'RDF4R/Driver/SimpleData' consumer = RDF4R::Consumer::Standard.new raise "No file specified." if (filename==nil or filename=='') File.open(filename) do |file| if parser=='rexml' begin require 'RDF4R/Driver/REXRDFParser' return RDF4R::Driver::REXRDFParser.process(file, base_uri, consumer) rescue Exception STDERR.puts "# FAILED TO LOAD Parser: #{parser}, Error: #{$!}." puts "Trying fallback parser. " end end # STDERR.puts ("EXPAT: Going for it with: file=#{file}, base_uri=#{base_uri}, consumer=#{consumer}") begin return RDF4R::Driver::XMLParser.process(file, base_uri, consumer) rescue Exception STDERR.puts "Clumsy recover from XML parsing error: #{$!} " return nil end end end # FIXME: this takes a filename not a URI # it seems to get confused if filename contains '_' character, presume other chars problematic too. # # # FIXME: this is a bit cluttered and confused. wtf is going on! tidy up needed. # see test(s): # http://www.w3.org/2001/12/rubyrdf/pack/tests/net/tc_baseuri.rb def Loader.get_rdf(filename, uri=nil, realbaseuri=nil) raise "nil filename in Loader.get_rdf" if filename==nil parser='rubyexpat' #default require 'RDF4R/Consumer/Standard' require 'RDF4R/Driver/XMLParser' require 'RDF4R/Driver/RDFParser' require 'RDF4R/Driver/SimpleData' models = nil # STDERR.puts "DEBUG: calling parse_rdf: filename:#{filename} parser=#{parser} unused realbaseuri=#{realbaseuri}" if realbaseuri models = Loader.parse_rdf(filename, realbaseuri, parser) # STDERR.puts "Working with realbaseuri=#{realbaseuri}" else base = %Q{file://#{filename}} # base = 'http://test' STDERR.puts "Working with filename=#{filename} base=#{base}" models = Loader.parse_rdf(filename, base, parser) end data=Graph.new([]) #if ((models != nil) & (models.size == 0)) # puts "no models found" # exit 0 #elsif ((models != nil) & (models.size > 1)) # puts "i got multiple models, you probably didn't want that" # exit 0 #else if (models != nil) model = models.shift if uri resource = model.resources_by_uri[uri] # resource.to_rdf_xml $stdout else #model.to_rdf_xml $stdout cache={} #bnode cache next if model == nil # hmm model.statements.each do |s| s.each do |bit| #puts "Got bit: #{bit}" if bit.class.to_s =~ /RDF4R/ nt = bit.to_ntriple # puts "Bit type: #{bit.type}" ob=bit.object.to_s #STDERR.puts "Checking triple value: '#{ob}'" if ob =~ /^\s*\n\s*$/ # STDERR.puts "parser warning: suspect triple generated (extra \n valued property): \\n, context: nt" # FIXME: in the parser, not suppress output here! else # puts "GOT Triple: #{nt.inspect} size: #{s.size} \n" ### zzzzzzz #FIXME: This is a real hack. TODO: integrate via APIs Loader.parseline nt.to_s, data, cache end end end end end end return data end # Starting to ape the Cwm API (well, method name anyway) def Graph.load(uri, base_uri='', error_handlers=[]) Loader.get_rdf_from_uri(uri, base_uri, errorhandlers) end # Very ugly, but at least its in the library now # Fetch RDF from a URI # uses a scratch file, which doesn't get locked or cleaned up. # FIXME: # - find out how to feed text to Expat / RDFParser directly # - grab other HTTP code from scutter (gz etc?) # def Loader.get_rdf_from_uri(uri, base_uri=uri, errorhandlers=[]) require 'net/http' # FIXME: we don't use base_uri yet. uri=uri.to_s uri.chomp! models=[] uri =~ /:\/\/([^\/]+)(\/*.*)$/ host = $1 res = $2 h = Net::HTTP::new host user_agent = 'RubyRDF;http://www.w3.org/2001/12/rubyrdf/intro.html' rdfdata=Graph.new([]) my_headers = {'Accept' => 'application/rdf+xml', 'User-agent' => user_agent } h.open_timeout = 10 h.read_timeout = 60 begin resp, data = h.get(res, my_headers) rescue error_msg="rdfget: HTTP GET on '#{uri}'. Returning empty graph. error:#{$!}" errorhandlers.each {|handler| handler.call error_msg } return rdfdata end # base=uri fn="local.rdf" # FIXME: temporary file; shouldn't need this :( :( File::delete fn if File::exists? fn cf = File::new( fn, File::CREAT|File::RDWR, 0644) cf.write data cf.close data = Loader.get_rdf(fn, nil, base_uri) # data = Loader.get_rdf(fn, nil) File::delete fn if File::exists? fn return data end # filename, returning ntriples as text # def Loader.xsltrdf (fn, baseuri='file:/dev/null/nobaseuri/') xsltparser='rdf2nt-mf.xsl' parser=RDFParser.xslt xsl = parser xml = open(fn){|f| f.read} require 'sablot' #exception handling if this is missing? sab = Sablot.new() arg = {"a"=>xsl, "b"=>xml} param = {"base-uri" => baseuri} begin sab.runProcessor("arg:/a", "arg:/b", "arg:/c", param, arg) rescue puts "Error: "+$! end res= sab.resultArg("arg:/c") return res end def Loader.xsltstring2rdf (xml, baseuri) require 'sablot' #exception handling if this is missing? sab = Sablot.new() arg = {"a"=>RDFParser.xslt, "b"=>xml} param = {"base-uri" => baseuri} begin sab.runProcessor("arg:/a", "arg:/b", "arg:/c", param, arg) rescue puts "Error: "+$! end res= sab.resultArg("arg:/c") return res end # pull NTriples from somewhere, return them as a new Graph # def Loader.rdf2nt (input, data=Graph.new([])) # print "NT reader: #{input.inspect}\n" gets.each {|l| Loader.parseline(l,data) } # defaults STDIN return data end # URI (just a filename currently), returns a data graph (somehow) # # this is all broke: shouldn't need to pass in the RDF parser def Loader.rdf2graph(uri, base, data = Graph.new([]) ) nt = Loader.xsltrdf(uri,base ) # we'll use XSLT # could use web service, commandline etc too nt = "#{nt}" nt.each { |l| Loader.parseline(l,data) } # print "Got graph in NT: #{data.inspect} \n" return data end def Loader.rdfdata2graph(rdfdata, base, data = Graph.new([]) ) nt = Loader.xsltstring2rdf(rdfdata,base ) # we'll use XSLT nt = "#{nt}" nt.each { |l| Loader.parseline(l,data) } return data end def Loader.nt2graph(input='stdin', data=Graph.new([]) ) # default to STDIN; should allow forfiles too bNodeIDCache={} while (gets) Loader.parseline($_,data, bNodeIDCache) end return data end def Loader.ntfile2graph(file='_default.nt', data=Graph.new([]) ) begin File.open(file) do |f| bNodeIDCache={} f.each do |line| Loader.parseline line ,data, bNodeIDCache end end rescue puts "Error opening file '#{file}', no data loaded." # stderr? end return data end def Loader.parseline(line,data,bNodeIDCache={}) # # regex literal in conditions: if /blah/ needs fixing. FIXME. # $_ = line # need to tidy this up return if (line =~ /^#/) # re IO see return unless (line =~ /\S/) # http://www.rubycentral.com/book/intro.html $_ = $_.sub(/^ */, '') # http://www.rubycentral.com/book/tut_io.html $_ = $_.sub(/\s*\.\s*$/, '') # chomp ## TODO: literals with spaces parts = $_.split(/\s+/) # other ws? tricky as need to re-assemble # this is the wrong way to parse ntriples # puts "LOADER: Summary: #{parts.inspect}\n" return unless parts.length>=3 # literals > 3? parts[2] += " "+parts[3..parts.length].join(' ') if(parts.length >3) st = Loader.term(parts[0], 0) pt = Loader.term(parts[1], 0) ot = Loader.term(parts[2], 1) #puts "LOADER: output: s:#{st} p:#{pt} o:#{ot} \n"# xxxxdanbri ## todo: here we should behave differently for genid'd values ## this in NTriple stuff _: so code lives here. # this is clearly a bit longwinded and in need of re-org # don't think we do literals-vs-resources right (unless Node.new handes it) if st =~ /^_:/ s_node = Node.getBlank(data, bNodeIDCache[st]) bNodeIDCache[st]=s_node.to_s else s_node = Node.new(st) end if pt =~ /^_:/ p_node = Node.getBlank(data, bNodeIDCache[pt]) bNodeIDCache[pt]=p_node.to_s # should only do if needed else p_node = Node.new(pt) end if ot =~ /^_:/ o_node = Node.getBlank(data, bNodeIDCache[ot]) bNodeIDCache[ot]=o_node.to_s else o_node = Node.new(ot) end s = Statement.new( s_node, p_node, o_node ) data.tell(s) end end class RDFParser # by MaxF, copied here so we can find it easily! # todo: # add link to home URI for this... def RDFParser.xslt return %{ } end end