#!/usr/bin/python
"""
recover legacy metadata from symlinks in our AFS
filesystem
$Id: groktrleg.py,v 1.3 2001/08/31 01:52:21 connolly Exp $
"""

import os, re, sys
import urlparse

def main():
	startDoc("http://www.w3.org/2000/10/swap/pim/doc#")
	trNS="http://www.w3.org/2001/02pd/rec54#"
	trdir="/afs/w3.org/pub/WWW/TR/"
	traddr="http://www.w3.org/TR/"
	trfnames=os.listdir(trdir)

	yymmdd = re.compile(r"\d\d\d\d\d+")
	statusRE = re.compile(r"/(NOTE|WD|CR|PR|REC)-")

	for name in trfnames:
		try:
			lnk = os.readlink(os.path.join(trdir, name))
		except OSError:
			pass
		else:
			generic=mediaBase(traddr, name)
			dated=mediaBase(traddr, lnk)
			makeStatement(dated, 'linkedFrom', generic)
			# KLUDGE around smil20 vs smil20/ and the like
			if generic[-1] <> "/":
				makeStatement(dated, 'linkedFrom', generic+"/")
			m = yymmdd.search(dated)
			if m:
				date = m.group(0)
				if len(date) < 8: date = "19" + date
				date = "%s-%s-%s" % (date[:4], date[4:6], date[6:])
				makeStatement(dated, 'date', None, date)
			else:
				sys.stderr.write("@@ no date for %s <- %s\n" % (dated, generic))

			m = statusRE.search(dated)
			if m:
				status = m.group(1)
				makeStatement(dated, 'rdf:type', '%s%s' % (trNS, status))
			else:
				sys.stderr.write("@@ no status for %s <- %s \n" % (dated, generic))

	endDoc()

def startDoc(ns):
	print "<rdf:RDF xmlns='%s' xmlns:rdf='%s'>" % (ns, "http://www.w3.org/1999/02/22-rdf-syntax-ns#")

def endDoc():
	print "</rdf:RDF>"


def mediaBase(base, path):
	if path[-5:] == ".html":
		genericRel = path[:-5]
		generic = urlparse.urljoin(base, genericRel)
		makeStatement(urlparse.urljoin(base, path), 'mediaTypeVariantOf', generic)
	else:
		generic = urlparse.urljoin(base, path)
	return generic


def makeStatement(s, plocal, oref, olit=None):
	print "<rdf:Description rdf:about='%s'>" % (s,)
	if oref:
		print "  <%s rdf:resource='%s'/>" % (plocal, oref,) #@@ escaping
	else:
		print "  <%s>%s</%s>" % (plocal, olit, plocal) #@@ escaping
	print "</rdf:Description>"

if __name__ == '__main__':
	main()
