#!/bin/perl -w

# script that decomposes AMICO XML into RDF
# $Id: amico2rdf.pl,v 1.3 2002/09/03 18:58:24 em Exp $

use strict;
use XML::DOM;
use LWP::UserAgent;
use Getopt::Long;

# command line variables

my $xmlfile = '';
my $rdffile = '';

my $laf = "http://www.amico.org/laf/entities/";
my $datadir = "http://www.w3.org/2002/04/12-amico/data/";
my $origdir = "http://www.w3.org/2002/04/12-amico/orig/";
my $imagedir = $origdir . "tiffs/";
my $thumbdir = $origdir . "thumbs/";

GetOptions ('xml=s' => \$xmlfile,
 	    'rdf=s' => \$rdffile);

if ($xmlfile eq '') {
    print "Usage: amico2rdf.pl --xml xmlfile --rdf rdffile\n";
    exit 1;
}

open (OUTFILE, ">$rdffile") || die "sorry, system can\'t open outfile for writing";

# RDF/XML information for each record

my $header = '<?xml version="1.0" encoding="utf-8"?>\n<rdf:RDF\n  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"\n   xmlns="http://www.amico.org/vocab#">';
my $footer = '</rdf:RDF>';

# create parser

my $parser = XML::DOM::Parser->new();

my $doc = $parser->parsefile($xmlfile);

# get relavant information and start building record

my $wid = getNodeAttValue($doc, "AID", "id");

# uid

my $logdate = '';
my $idx = "0";

$logdate = `date --iso-8601`;
$logdate =~ tr/\n//d;


print OUTFILE $header;

# print catalog entry

if (getNodeValue($doc, "DCB")) {

    my $cid = '';

    $cid = $laf;
    $cid = $cid . getNodeValue($doc, "DCB");
    $cid =~ tr/[A-Z]/[a-z]/;
    $cid =~ s/\s/_/g;

    print OUTFILE "\n\n<Entry rdf:ID = \"", $logdate, "-", $idx++, "\">\n";
    print OUTFILE "  <describes rdf:resource = \"", $datadir, $wid, "\" />\n";
    print OUTFILE "  <catalogedBy>\n";
    print OUTFILE "    <Person rdf:about = \"", $cid, "\">\n";
    print OUTFILE "      <name>", getNodeValue($doc, "DCB"), "</name>\n";
    print OUTFILE "    </Person>\n";
    print OUTFILE "  </catalogedBy>\n";
    print OUTFILE "  <catalogedDate>", getNodeAttValue($doc, "DCD", "date"), "</catalogedDate>\n";
    print OUTFILE "</Entry>\n";
}

# print descriptive work entry

getNodeValue($doc, "OTY"),

    my $wtype = "rdf:Description";

if (getNodeValue($doc, "OTY")) {
    $wtype = getNodeValue($doc, "OTY");
    $wtype =~ s/\s//g;		# remove spaces (if any)
    $wtype =~ s/\w.+/\u\L$&/;	# force first char upper case, rest lower
}

print OUTFILE "\n\n<", $wtype, " rdf:about = \"", $datadir . $wid, "\">\n";

# get titles of work

foreach my $title ($doc->getElementsByTagName('OTG')) {

    my $ttype = getNodeValue($title, "OTT");
    $ttype =~ tr/[A-Z]/[a-z]/;	# normalization
    $ttype =~ s/\s//;

    if ($ttype ne 'title') {
	$ttype =~ s/title//;
	$ttype = $ttype . "Title";
    }

    print OUTFILE "  <", $ttype, ">", getNodeValue($title, "OTN"), "</", $ttype, ">\n";
}

print OUTFILE "  <measurementText>", getNodeValue($doc, "MET"), "</measurementText>\n";
print OUTFILE "  <matTechDescription>", getNodeValue($doc, "OMD"), "</matTechDescription>\n";
print OUTFILE "  <inscriptionMark>", getNodeValue($doc, "OIN"), "</inscriptionMark>\n";


# get entities associated with a work

foreach my $entity ($doc->getElementsByTagName('CRG')) {

    my $role = getNodeValue($entity, "CRR");
    $role =~ tr/[A-Z]/[a-z]/;

    if ($role eq '') {
	$ role = "artist";		# set default ??
    }

    my $eid = '';

    if (getNodeValue($entity, "CRN")) {
	$eid = getNodeValue($entity, "CRN");
	$eid =~ tr/[A-Z]/[a-z]/;
	$eid =~ s/\s/_/;
    }

    print OUTFILE "  <", $role, ">\n";

    if ($eid eq '') {
	print OUTFILE "    <Person>\n";
    } else {
	print OUTFILE "    <Person rdf:about = \"", $laf . $eid, "\">\n";
    }

    print OUTFILE "      <name>", getNodeValue($entity, "CRT"), "</name>\n";
    print OUTFILE "      <sortName>", getNodeValue($entity, "CRN"), "</sortName>\n";
    print OUTFILE "      <nationality>", getNodeValue($entity, "CRC"), "</nationality>\n";
    print OUTFILE "      <dateLocation>", getNodeValue($entity, "CDT"), "</dateLocation>\n";
    print OUTFILE "   </Person>\n";

    print OUTFILE "  </", $role, ">\n";

}

# get dates

print OUTFILE "  <creationDate>", $doc->getElementsByTagName('OCS')->item(0)->getAttribute('date'), " - ", $doc->getElementsByTagName('OCE')->item(0)->getAttribute('date'), "</creationDate>\n";

print OUTFILE "  <creationPlace>", getNodeValue($doc, "OCP"), "</creationPlace>\n";

# get styles

foreach my $style ($doc->getElementsByTagName('STG')) {

    my $stype = getNodeValue($style, "STD");

    print OUTFILE "  <style>\n";
    print OUTFILE "    <Period>\n";
    print OUTFILE "      <description>", getNodeValue($doc, "STD"), "</description>\n";
    print OUTFILE "    </Period>\n";
    print OUTFILE "  </style>\n";

}

# get owners

foreach my $owner ($doc->getElementsByTagName('OOG')) {

    my $oid = '';

    if (getNodeValue($owner, "OON")) {
	$oid = getNodeValue($owner, "OON");
	$oid =~ tr/[A-Z]/[a-z]/;
	$oid =~ s/\s/_/g;
    }

    print OUTFILE "  <owner>\n";

    if ($oid eq '') {
	print OUTFILE "    <Organization>\n";
    } else {
	print OUTFILE "    <Organization rdf:about = \"", $laf . $oid, "\">\n";
    }

    print OUTFILE "      <name>", getNodeValue($owner, "OON"), "</name>\n";
    print OUTFILE "      <place>", getNodeValue($owner, "OOP"), "</place>\n";
    print OUTFILE "      <credit>", getNodeValue($owner, "OOC"), "</credit>\n";
    print OUTFILE "      <accessionNumber rdf:resource = \"", $datadir . getNodeValue($owner, "OOA"), "\" />\n";
    print OUTFILE "    </Organization>\n";
    print OUTFILE "  </owner>\n";
}

# get rights

foreach my $right ($doc->getElementsByTagName('ORG')) {

    print OUTFILE "  <copyright rdf:resource = \"", getNodeAttValue($right, "ORL", "href"), "\" />\n";
}

# get related works

foreach my $wrelated ($doc->getElementsByTagName('RWG')) {

    my $rwtype = "related";

    if (getNodeValue($wrelated, "RWR")) {
	$rwtype = getNodeValue($wrelated, "RWR");
	$rwtype =~ tr/[A-Z]/[a-z]/;	# normalization
    }

    if (getNodeAttValue($wrelated, "RWL", "id")) {

	print OUTFILE "  <", $rwtype, " rdf:resource = \"", $datadir . getNodeAttValue($wrelated, "RWL", "id"), "\" />\n";
    }
}


# get related images

foreach my $relatedimg ($doc->getElementsByTagName('RIG')) {

    my $status = getNodeAttValue($relatedimg, "RIP", "preferred");

    if (getNodeAttValue($relatedimg, "RIL", "id")) {

	if ($status eq 'Y') {

	    # get image name to associate thumbnail image

	    my $thumbnail = '';
	    $_ = getNodeAttValue($relatedimg, "RIL", "id");
	    
	    if ((/(.*).TIF/) || (/(.*).tif/)) {
		
		# check to see if jpg thumbnail is availiable
		
		$thumbnail = $1 . "jpg";
		
		if (-e $thumbnail) {
		    
		    # not availiable, revert to null name 
		    $thumbnail = '';
		}
	    }

	    if (getNodeValue($relatedimg, "RID")) {

		print OUTFILE "  <preferred>\n";
		print OUTFILE "    <Image rdf:about = \"", $imagedir . getNodeAttValue($relatedimg, "RIL", "id"), "\">\n";
		print OUTFILE "      <description>", getNodeValue($relatedimg, "RID"), "</description>\n";

		if ($thumbnail ne '') {

		    # associate thumbnail image with image
		    print OUTFILE "      <thumbnailImage rdf:resource = \"", $thumbdir, $thumbnail, "\" />\n";
		}

		print OUTFILE "    </Image>\n";
		print OUTFILE "  </preferred>\n";

	    } else {
		
		print OUTFILE "  <preferred>\n";

		if ($thumbnail ne '') {

		    # associate thumbnail image with image

		    print OUTFILE "    <Image rdf:resource = \"", $imagedir . getNodeAttValue($relatedimg, "RIL", "id"), ">\n";
		    print OUTFILE "      <thumbnailImage rdf:resource = \"", $thumbdir, $thumbnail, "\" />\n";
		    print OUTFILE "    </Image>\n";

		} else {

		    print OUTFILE "    <Image rdf:resource = \"", $imagedir . getNodeAttValue($relatedimg, "RIL", "id"), "\" />\n";
		}
		 
		print OUTFILE "  </preferred>\n";
	    }

	} else {

	    if (getNodeValue($relatedimg, "RID")) {

		print OUTFILE "  <related>\n";
		print OUTFILE "    <Image>\n";
		print OUTFILE "      <description>", getNodeValue($relatedimg, "RID"), "</description>\n";
		print OUTFILE "    </Image>\n";
		print OUTFILE "  </related>\n";

	    }
	}
    }
}


# get related documents

foreach my $relateddoc ($doc->getElementsByTagName('RDG')) {

    if (getNodeAttValue($relateddoc, "RDL", "id")) {

	if (getNodeValue($relateddoc, "RDD")) {

	    print OUTFILE "  <related>\n";
	    print OUTFILE "    <Document rdf:about = \"", $datadir, getNodeAttValue($relateddoc, "RDL", "id"), "\">\n";
	    print OUTFILE "      <description>", getNodeValue($relateddoc, "RDD"), "</description>\n";
	    print OUTFILE "    </Document>\n";
	    print OUTFILE "  </related>\n";

	} else {

	    print OUTFILE "  <related>\n";
	    print OUTFILE "    <Document rdf:resource = \"", $datadir, getNodeAttValue($relateddoc, "RDL", "id"), "\" />\n";
	    print OUTFILE "  </related>\n";
	}

    } else {

	if (getNodeValue($relateddoc, "RDD")) {

	    print OUTFILE "  <related>\n";
	    print OUTFILE "    <Document>\n";
	    print OUTFILE "      <description>", getNodeValue($relateddoc, "RDD"), "</description>\n";
	    print OUTFILE "    </Document>\n";
	    print OUTFILE "  </related>\n";

	}
    }
}



# end work

print OUTFILE "</", $wtype, ">\n";


# begin instance description

my $iid = getNodeAttValue($doc, "XID", "id");

# print descriptive work entry

my $itype = "rdf:Description";

if (getNodeValue($doc, "XAM")) {
    $itype = getNodeValue($doc, "XAM");
    $itype =~ s/\w.+/\u\L$&/;	# force first char upper case, rest lower
}

print OUTFILE "\n\n<", $itype, " rdf:about = \"", $datadir . $iid, "\">\n";


print OUTFILE "  <description>", getNodeValue($doc, "XDE"), "</description>\n";
print OUTFILE "  <publisher>", getNodeValue($doc, "XPU"), "</publisher>\n";
print OUTFILE "  <type>", getNodeValue($doc, "XRT"), "</type>\n";
print OUTFILE "  <encoding>", getNodeValue($doc, "XFE"), "</encoding>\n";
print OUTFILE "  <dimensions>", getNodeValue($doc, "XFD"), "</dimensions>\n";
print OUTFILE "  <fileSize>", getNodeValue($doc, "XFF"), "</fileSize>\n";
print OUTFILE "  <compression>", getNodeValue($doc, "XFC"), "</compression>\n";

#get relations

foreach my $irelated ($doc->getElementsByTagName('XRE')) {

    my $irtype = "related";

    if (getNodeValue($irelated, "XRY")) {
	$irtype = getNodeValue($irelated, "XRY");
	$irtype =~ tr/[A-Z]/[a-z]/;	# normalization
    }

    if (getNodeAttValue($irelated, "XRI", "id")) {

	print OUTFILE "  <", $irtype, " rdf:resource = \"", $datadir . getNodeAttValue($irelated, "XRI", "id"), "\" />\n";
    }

}

print OUTFILE "  <copyright>", getNodeValue($doc, "XRS"), "</copyright>\n";

# end instance desription

print OUTFILE "</", $itype, ">\n\n";


# @@ todo: transform amico validation data into RDF @@

# print footer

print OUTFILE $footer;

close OUTFILE;

exit;



sub getNodeValue {		# simple value extractor

    my ( $node, $element) = @_;

    my $value = '';

    if ($node->getElementsByTagName($element)->item(0)) {
	$value = $node->getElementsByTagName($element)->item(0)->getFirstChild->getNodeValue;
    }

    $value =~ s/&#233 /e /;
    return $value;
}

sub getNodeAttValue {		# simple value extractor

    my ( $node, $element, $att) = @_;

    my $value = '';

    if ($node->getElementsByTagName($element)->item(0)) {

	if ($node->getElementsByTagName($element)->item(0)->getAttribute($att)) {

	    $value = $node->getElementsByTagName($element)->item(0)->getAttribute($att);

	}

    }

    return $value;
}




