#!/usr/local/bin/perl
#
# $Id: nt2dot.pl,v 1.7 2001/09/08 10:40:25 danbri Exp $
# Input: ntriple Web-data graphs (see http://www.w3.org/2001/sw/RDFCore/ntriples/)
# Output: .dot digraph files for GraphViz graphing tool 
#
# author: danbri@w3.org
#
# example usage: ./nt2dot.pl foo.nt > foo.dot; dot -Tpng foo.dot
#
# todo: use a real (perl) .nt parser instead of my one-line hack parser 

use strict;

# keep track of graph data:
my %resources;
my %arcs;
my %literals;
my $ri=1;
my $ai=1;
my $li=1; #indexes on each

my @webdata;
my $file = shift;
open (IN,$file); # || print STDERR "$0: No data file $file available";
if ($file) {
  @webdata = <IN>;
}
else {
  @webdata = <STDIN>;
}
if  (!scalar @webdata) { 
  print STDERR "$0: No data found.";
}

sub nt2s {
  my $t=shift;
  $t =~ s/\s+//;
  $t =~ s/\s+\.\s*$//g;
  return (split (/\s+/, $t ));
}

print header();
         
# three types out output desired. resources, arcs, literals                 

foreach my $triple(@webdata){ 
  next unless ($triple =~ m/\S/);
  chomp $triple;
  my($subj,$pred,$obj) = nt2s($triple);
  # print STDERR "Subj= $subj  pred= $pred obj=$obj data=$triple\n";

  if (! $resources{$subj}) {
    # print "making R node for $subj\n"; 
    print "  r$ri [label=\"" .squash($subj). "\"];\n";
    $resources{$subj}=$ri;
    $ri++;
  }

  if (! $resources{$obj}) {
    if ($obj =~ m/^"/){
      $obj =~ s/"/'/g;
      print "  l$li [label=\" '".squash($obj)."'\" shape=plaintext];\n";
      $literals{$obj}=$li;
      $li++;
    } else {
      print "  r$ri [label=\"".squash($obj)."\"];\n";
      $resources{$obj}=$ri;
      $ri++;
    }
  }

  if (! $arcs{$pred.$subj.$obj}) {
    $arcs{$pred.$subj.$obj}++;
    my $from= 'r'.$resources{$subj};
    my $to = $resources{$obj} || 'l'.$literals{$obj}; 
    if ($to !~ m/^l/) { $to = 'r'.$to;} #ugh. fix.
    print " $from -> $to [label=\"".squash($pred)."\" fontsize=12];\n";
    $arcs{$pred}=$ri;
    $ri++;
  }
}

print "\n\n}\n";


# Here we shorten strings when we meet a familiar namespace
# and strip out angle brackets that survived the ntriple hack-parser
#
sub squash($){
  my $data = shift;
  $data =~ s%http://www.w3.org/1999/02/22-rdf-syntax-ns#%rdf::%g;
  $data =~ s%http://www.w3.org/2000/01/rdf-schema#%rdfs::%g;
  # todo: Add XML Schema, datatypes, SOAP namespace URIs here?

  $data =~ s%\[(^])+#genid(.*)\]% genid $2 %g; # hack: shorten genids (guessing!)
  $data =~ s/\[.*genid(.*)\]/[?GENID_$1]/g;    # -- should use ntriples properly	

  $data =~ s/^\s*<//;
  $data =~ s/\s*>$//;
  return $data;
}


sub header {
return  ('digraph G {
  size="25,25";
  ratio=auto;
  node [shape=ellipse,fontsize=11];
  rankdir=LR;');
}
