#!/bin/env python
"""
dbview -- view an SQL DB thru RDF glasses.
*** NOTE WELL ***
As of May 2006, active development has moved to
http://dig.csail.mit.edu/2006/dbview/dbview.py
an implementation of...
[SWDB]
Relational Databases on the Semantic Web
Id: RDB-RDF.html,v 1.17 2002/03/06 20:43:13 timbl
http://www.w3.org/DesignIssues/RDB-RDF.html
Share and Enjoy. Open Source license:
Copyright (c) 2001 W3C (MIT, INRIA, Keio)
http://www.w3.org/Consortium/Legal/copyright-software-19980720
consider:
select person.name, phone.room from persons,phones
where phone.owner=person.id;
we can look at persons, phones as RDF Classes...
name as a property whose domain is person,
and room as a property whose domain is phone.
then... what's the subject of the stuff in the
result of the query?
What we really want is something like...
{ ?phone :room ?r;
:owner ?who.
?who :name ?n}
log:implies
{ [ :room ?r;
:owner [:name ?n]]}.
but how do we know where to put the []'s in the implies, when
converting an SQL rule to an N3 rule?
The solution implemented here is to include primary key info in the
query, and to treat (database, table, primary key value)s as
URIs.
A query regards a number of tables in a database; for
each table, we're given
- the table's name
- the fields we want selected from the table (if any)
- the table's primary key (if relevant)
- the fields we want to join with other table's primary keys
- an optional/supplimentary WHERE clause
TODO
- show tables, describe _table_ ==> RDF schema. (in progress)
REFERENCES
Python Database API v2.0
http://www.python.org/topics/database/DatabaseAPI-2.0.html
MySQL Reference Manual for version 4.0.2-alpha.
generated on 7 March 2002
http://www.mysql.com/documentation/mysql/bychapter/manual_toc.html
SEE ALSO
earlier dev notes, links, ...
http://ilrt.org/discovery/chatlogs/rdfig/2002-02-27#T18-29-01
http://rdfig.xmlhack.com/2002/02/27/2002-02-27.html#1014821419.001175
"""
__version__ = "$Id: dbview.py,v 1.21 2007/06/26 02:36:16 syosi Exp $" #@@consult python style guide
from string import join, split
import codecs
import MySQLdb # MySQL for Python
# http://sourceforge.net/projects/mysql-python
# any Python Database API-happy implementation will do.
from RDFSink import SYMBOL, LITERAL
import toXML # RDF/XML sink
import BaseHTTPServer
import cgi # for URL-encoded query parsing
RDF_MediaType = "text/xml" #@@ cf. RDF Core
#"what mime type to use for RDF?" issue...
class DBViewServer(BaseHTTPServer.HTTPServer):
"""Export an SQL database, read-only, into HTTP/RDF.
@@integration with http://www.w3.org/DesignIssues/RDB-RDF.html
in progress.
databasename is
'http://%s:%s%s%s' % (addr[0], addr[1], home, dbName)
e.g.
http://example:9003/dbview/inventory
table names are, e.g.
http://example:9003/dbview/inventory/products
@@hmm... conflate the table-class name with an HTTP document?
field names are, e.g.
http://example:9003/dbview/inventory/products#description
object names are, e.g.
http://example:9003/dbview/inventory/products/1432
where 1432 is the value of the primary key in the products table.
"""
def __init__(self, addr, handlerClass, db, home, dbName):
BaseHTTPServer.HTTPServer.__init__(self, addr, handlerClass)
self._db = db
self._home = home
self._dbName = dbName
self._base = 'http://%s:%s%s%s' % (addr[0], addr[1], home, dbName)
class DBViewHandler(BaseHTTPServer.BaseHTTPRequestHandler):
QPath = '/.dbq' # not an SQL name
UIPath = '/.ui'
def do_GET(self):
s = self.server
#DEBUG "which get?", self.path, s._home + s._dbName + self.UIPath
dbdocpath = s._home + s._dbName
if self.path[:len(dbdocpath)] == dbdocpath:
rest = self.path[len(dbdocpath):]
if rest == '':
self.describeDB()
elif rest == self.UIPath:
self.getUI()
elif rest[:len(self.QPath)+1] == self.QPath + '?':
self.getView()
elif rest[0] == '/':
self.describeTable(rest[1:])
else:
self.notFound()
else:
self.notFound()
def notFound(self):
s = self.server
pui = s._home + s._dbName + self.UIPath
self.send_response(404)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write("""
DBViewHandler: 404: Not Found
Not Found
try: %s database UI.
cf 10.4.5 404 Not Found from the HTTP specification.
""" % (pui, s._dbName) )
def getUI(self):
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write("""
RDF query interface@@
""")
queryUI(self.wfile.write,
6, #@@ more tables?
self.server._home + self.server._dbName + self.QPath,
self.server._dbName)
self.wfile.write("""
@@TODO: link to form with more than 3 tables
""")
self.wfile.write("""
Descriptions/Schemas
database: %s.
tables:
""" % (self.server._dbName, self.server._dbName))
for tbl in tableNames(self.server._db):
self.wfile.write("- %s
\n" % \
(tbl, tbl))
self.wfile.write("
")
self.wfile.write("""
Dan C@@ $Revision: 1.21 $ $Date: 2007/06/26 02:36:16 $
""")
def describeDB(self):
self.send_response(200, "@@not sure you're winning yet, actually")
self.send_header("Content-type", RDF_MediaType)
self.end_headers()
s = self.server
dbdocaddr = s._base
sink = toXML.ToRDF(self.wfile, dbdocaddr)
aboutDB(self.server._db, dbdocaddr, sink)
sink.endDoc()
def describeTable(self, tbl):
self.send_response(200, "@@not sure you're winning yet, actually")
self.send_header("Content-type", RDF_MediaType)
self.end_headers()
s = self.server
dbdocaddr = s._base
sink = toXML.ToRDF(self.wfile, dbdocaddr)
aboutTable(self.server._db, dbdocaddr, sink, tbl)
sink.endDoc()
def getView(self):
self.send_response(200, "@@not sure you're winning yet, actually")
self.send_header("Content-type", RDF_MediaType)
self.end_headers()
s = self.server
dbaddr = s._base
sink = toXML.ToRDF(self.wfile, dbaddr)
path, qs = split(self.path, '?')
fields, tables, keys, joins, cond = parseQuery(qs)
askdb(self.server._db, dbaddr, sink,
fields, tables, keys, joins, cond)
sink.endDoc()
def asSQL(fields, tables, keys, joins, condextra = ''):
"""format query as SQL.
rougly: select fields from tables where keyJoins and condextra
details:
tables is a list of table names
fields is a list of lists of fieldnames, one list per table
keys is a dictionary that maps tables to given primary key fields
and keyJoins is a list of lists... the bottom half
of a matrix... keyJoins[i][j] is None or the
name of a field in table i to join with the primary
key of table j.
condextra is an SQL expression
"""
fldnames = []
for ti in range(len(tables)):
for f in fields[ti]:
fldnames.append("%s.%s" % (tables[ti], f))
cond = ''
for i in range(len(tables)):
for j in range(i):
if joins[i][j]:
jexpr = "%s.%s = %s.%s" % (tables[i], joins[i][j],
tables[j], keys[tables[j]])
if cond: cond = cond + ' AND '
cond = cond + jexpr
if condextra:
if cond:
cond = cond + ' AND ' + condextra
else:
cond = condextra
q = 'select %s from %s' % (join(fldnames, ','), join(tables, ','))
if cond: q = q + ' where ' + cond
return q + ';'
def askdb(db, dbaddr, sink, fields, tables, keys, joins, condextra):
"""ask a query of a database; output results as RDF formula to sink.
db is a python DB API database object
dbaddr is a URI for the database (see DBViewServer above for details)
sink is an RDF serializer (cf RDFSink module)
fields/tables/keys/joins/condextra per asSQL above.
note that fields really needs to be a list of lists, not
a list of tuples, since tuples don't grok .index(). Odd.
We assume table names are also
XML names (e.g. no colons) and
URI path segments (e.g. no non-ascii chars, no /?#).
"""
fmla = something(sink, None, 'F')
c = db.cursor()
q = asSQL(fields, tables, keys, joins, condextra)
c.execute(q)
# using table names as namespace names seems to be aesthetic...
for n in tables:
sink.bind(n, "%s/%s#" % (dbaddr, n))
# and suggest a prefix for the table classes...
sink.bind('tables', "%s#" % (dbaddr,))
while 1:
row=c.fetchone()
if not row:
break
things = [None] * len(tables)
col = 0
for ti in range(len(tables)):
if fields[ti]:
tbl = tables[ti]
# figure out the subject of these cells...
# all the property-fields from the same table-class
# share a subject...
# do we know a unique name for it?
# i.e. ... is the table/class keyed?
uri = None
try:
kn = keys[tbl]
# yes...
# does this query return the key value?
try:
kc = fields[ti].index(kn)
except ValueError:
# no...
pass
else:
kv = row[col+kc]
uri = "%s/%s/%s#item" % (dbaddr, tbl, kv) # cf [SWDB]
except KeyError:
# no, no key supplied for this class/table
pass
if uri: subj = (SYMBOL, uri)
else: subj = something(sink, fmla, tbl)
# remember this for joins...
things[ti] = subj
# subj rdf:type _tableClass_.
sink.makeStatement((fmla,
(SYMBOL, RDF.type),
subj,
(SYMBOL, "%s#%s" % (dbaddr, tbl))
))
for fld in fields[ti]:
cell = row[col]
if cell is None: continue
# @@our mysql implementation happens to use iso8859-1
ol = (LITERAL, latin1(str(cell)))
# ok... now we have subj, pred and obj...
prop = (SYMBOL, "%s/%s#%s" % (dbaddr, tbl, fld))
sink.makeStatement((fmla, prop, subj, ol))
col = col + 1
# now relate it to other things...
for j in range(ti):
if joins[ti][j] and things[j]:
prop = (SYMBOL, "%s/%s#%s" %
(dbaddr, tables[ti], joins[ti][j]))
sink.makeStatement((fmla, prop, subj, things[j]))
# cf
# 4.8 codecs -- Codec registry and base classes
# http://www.python.org/doc/2.1.2/lib/module-codecs.html
latin1_decode = codecs.lookup('iso8859-1')[1]
def latin1(bytes):
return latin1_decode(bytes)[0]
g=0
def something(sink, scope=None, hint='gensym'):
global g
g=g+1
term = (SYMBOL, '#%s%s' % (hint, g))
#if scope: sink.makeStatement((scope,
#(SYMBOL, RDFSink.forSomeSym),
#scope, term))
return term
class Namespace:
"""A collection of URIs witha common prefix.
rape-and-pasted from 2000/04/maillog2rdf/mid_proxy.py
ACK: AaronSw / #rdfig
http://cvs.plexdev.org/viewcvs/viewcvs.cgi/plex/plex/plexrdf/rdfapi.py?rev=1.6&content-type=text/vnd.viewcvs-markup
"""
def __init__(self, nsname): self.nsname = nsname
def __getattr__(self, lname): return self.nsname + lname
def __str__(self): return self.nsname
def sym(self, lname): return self.nsname + lname
SwDB = Namespace("http://www.w3.org/2000/10/swap/db#")
# in particular: db.n3,v 1.5 2002/03/06 23:12:00
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
DAML = Namespace("http://www.daml.org/2001/03/daml+oil#")
def aboutDB(db, dbdocaddr, sink):
"""describe a database into a sink.
We have a need for arbitrary names that don't clash
with table and column names. We chose names with '.'
at the end (or beginning?), taking advantage of:
You cannot use the `.' character in names because
it is used to extend the format by which you can refer
to columns.
-- 6.1.2 Database, Table, Index, Column, and Alias Names
http://www.mysql.com/documentation/mysql/bychapter/manual_Reference.html#Legal_names
"""
fmla = something(sink, None, 'aboutDB')
sink.bind('db', str(SwDB))
sink.bind('rdfs', str(RDFS))
sink.bind('rdf', str(RDF))
dbaddr = "%s#.database" % dbdocaddr
# db:databaseSchema .
sink.makeStatement((fmla,
(SYMBOL, SwDB.databaseSchema),
(SYMBOL, dbaddr),
(SYMBOL, dbdocaddr),
))
sink.makeStatement((fmla,
(SYMBOL, SwDB.formService),
(SYMBOL, dbdocaddr),
(SYMBOL, "%s%s" % (dbdocaddr, DBViewHandler.QPath)),
))
for tbl in tableNames(db):
tblI = "%s#%s" % (dbdocaddr, tbl)
tbldocI = "%s/%s" % (dbdocaddr, tbl)
sink.makeStatement((fmla,
(SYMBOL, SwDB.table),
(SYMBOL, dbaddr),
(SYMBOL, tblI),
))
sink.makeStatement((fmla,
(SYMBOL, SwDB.tableSchema),
(SYMBOL, tblI),
(SYMBOL, tbldocI),
))
sink.makeStatement((fmla,
(SYMBOL, RDFS.label),
(SYMBOL, tblI),
(LITERAL, tbl)
))
def tableNames(db):
c = db.cursor()
c.execute("show tables") # mysql-specific?
# http://www.mysql.com/documentation/mysql/bychapter/manual_MySQL_Database_Administration.html#SHOW
res = []
while 1:
row = c.fetchone()
if not row: break
res.append(row[0])
return res
def aboutTable(db, dbaddr, sink, tbl):
fmla = something(sink, None, 'aboutTable')
sink.bind('db', str(SwDB))
sink.bind('rdfs', str(RDFS))
sink.bind('rdf', str(RDF))
c = db.cursor()
c.execute("show columns from %s" % tbl)
while 1:
row = c.fetchone()
if not row: break
colName, ty, nullable, isKey, dunno, dunno = row
tblI = "%s#%s" % (dbaddr, tbl)
colI = "%s/%s#%s" % (dbaddr, tbl, colName)
sink.makeStatement((fmla,
(SYMBOL, SwDB.column),
(SYMBOL, tblI),
(SYMBOL, colI),
))
if isKey == 'PRI':
sink.makeStatement((fmla,
(SYMBOL, SwDB.primaryKey),
(SYMBOL, tblI),
(SYMBOL, colI),
))
sink.makeStatement((fmla,
(SYMBOL, RDFS.domain),
(SYMBOL, colI),
(SYMBOL, tblI),
))
sink.makeStatement((fmla,
(SYMBOL, RDFS.label),
(SYMBOL, colI),
(LITERAL, colName)
))
# could expose nullable as DAML.minCardinality
# could expose type as RDFS.range
###############
# Forms UI
def queryUI(write, n, qpath, dbName):
write("\n")
def parseQuery(qs):
"""Parse url-encoded SQL query string
qs is an URL-encoded query string, ala the form above
return fields, tables, keys, keyJoins, condition ala asSQL() above.
"""
form = cgi.parse_qs(qs)
try:
condition = form['condition'][0]
except KeyError:
condition = None
i = 1
tables = []
fields = []
keys = {}
while 1:
nameN = 'name%s' % i
if not form.has_key(nameN): break
tname = form[nameN][0]
tables.append(tname)
try:
fieldsI = split(form['fields%s' % i][0], ',')
except KeyError:
fields.append(())
else:
fields.append(fieldsI)
try:
key = form['key%s' % i][0]
except:
pass
else:
keys[tname] = key
i = i + 1
keyJoins = []
for i in range(1, len(tables)+1):
tjoins = []
for j in range(1, i):
kjN = 'kj%s_%s' % (i, j)
try:
f = form[kjN][0]
except KeyError:
tjoins.append(None)
else:
tjoins.append(f)
keyJoins.append(tjoins)
return fields, tables, keys, keyJoins, condition
###############################
# Test harness...
def testSvc():
import sys
host, port, user, passwd, httpHost, httpPort = sys.argv[2:8]
port = int(port)
dbName = 'administration' #@@
db=MySQLdb.connect(host=host, port=port,
user=user, passwd=passwd,
db=dbName)
hostPort = (httpHost, int(httpPort))
httpd = DBViewServer(hostPort, DBViewHandler, db, '/', dbName)
print "base:", httpd._base
print "Serving HTTP on port", httpPort, "..."
httpd.serve_forever()
def testShow():
import sys
host, port, user, passwd = sys.argv[2:6]
port = int(port)
dbName = 'administration' #@@
db=MySQLdb.connect(host=host, port=port,
user=user, passwd=passwd,
db=dbName)
dbaddr = 'http://example/administration'
sink = toXML.ToRDF(sys.stdout, dbaddr)
aboutDB(db, dbaddr, sink)
aboutTable(db, dbaddr, sink, 'users')
sink.endDoc()
def testUI():
import sys
queryUI(sys.stdout.write, 3, '/', "niftydb")
def testSQL():
cases = ('/administration/.dbq?name1=users&fields1=family%2Cemail%2Ccity%2Cid&key1=id&name2=techplenary2002&fields2=plenary%2Cmeal_choice&key2=&kj2_1=id&name3=&fields3=&key3=&kj3_1=&kj3_2=&name4=&fields4=&key4=&kj4_1=&kj4_2=&kj4_3=',
'/manage/.dbq?name1=Resources&fields1=ResourceName%2CResourceID&key1=ResourceID&name2=Activities&fields2=ActivityName%2CActivityID&key2=ActivityID&kj2_1=ResourceID&name3=Assignments&fields3=Percent_of_person%2CAssignment_ID&key3=Assignment_ID&kj3_1=MyResourceID&kj3_2=MyActivityID&name4=&fields4=&key4=&kj4_1=&kj4_2=&kj4_3=&condition=',
'/w3c/.dbq?name1=uris&fields1=uri&key1=id&name2=groupDetails&fields2=id%2Cname&key2=id&kj2_1=sub&name3=ids&fields3=&key3=id&kj3_1=&kj3_2=sponsor&name4=userDetails&fields4=id%2Cfamily%2Cgiven%2Cphone&key4=&kj4_1=emailUrisId&kj4_2=&kj4_3=id&name5=hierarchy&fields5=&key5=id&kj5_1=&kj5_2=&kj5_3=sub&kj5_4=&name6=&fields6=&key6=&kj6_1=&kj6_2=&kj6_3=&kj6_4=&kj6_5=&name7=&fields7=&key7=&kj7_1=&kj7_2=&kj7_3=&kj7_4=&kj7_5=&kj7_6=&condition=hierarchy.type%3D%27U%27+and+hierarchy.super%3D30310',
'/w3c/.dbq?name1=uris&fields1=uri&key1=id'
)
if sys.argv[2:]: cases = sys.argv[2:]
for path in cases:
path, fields = split(path, '?')
print "CGI parse:", cgi.parse_qs(fields)
fields, tables, keys, joins, cond = parseQuery(fields)
print "SQL parse:", fields, tables, keys, joins, cond
print "as SQL:", asSQL(fields, tables, keys, joins, cond)
def testQ():
import sys
host, port, user, passwd = sys.argv[2:6]
port = int(port)
path='/administration/.dbq?name1=users&fields1=family%2Cemail%2Ccity%2Cid&key1=id&name2=techplenary2002&fields2=plenary%2Cmeal_choice&key2=&kj2_1=id&name3=&fields3=&key3=&kj3_1=&kj3_2=&name4=&fields4=&key4=&kj4_1=&kj4_2=&kj4_3='
path, fields = split(path, '?')
print "CGI parse:", cgi.parse_qs(fields)
fields, tables, keys, joins, cond = parseQuery(fields)
print "SQL parse:", fields, tables, keys, joins, cond
print "as SQL:", asSQL(fields, tables, keys, joins, cond)
sink = toXML.ToRDF(sys.stdout, 'stdout:')
db=MySQLdb.connect(host=host, port=port,
user=user, passwd=passwd,
db='administration')
dbaddr = 'http://example/administration'
askdb(db, dbaddr, sink,
fields, tables, keys, joins, cond)
sink.endDoc()
def main(argv):
host, port, user, passwd, httpHost, httpPort, dbName = argv[1:8]
port = int(port)
db=MySQLdb.connect(host=host, port=port,
user=user, passwd=passwd,
db=dbName)
hostPort = (httpHost, int(httpPort))
httpd = DBViewServer(hostPort, DBViewHandler, db, '/', dbName)
print "base:", httpd._base
print "Serving database %s HTTP on port %s ..." % (dbName, httpPort)
httpd.serve_forever()
if __name__ == '__main__':
import sys
if sys.argv[1] == '--testSvc': testSvc()
elif sys.argv[1] == '--testShow': testShow()
elif sys.argv[1] == '--testQ': testQ()
elif sys.argv[1] == '--testUI': testUI()
elif sys.argv[1] == '--testSQL': testSQL()
else:
main(sys.argv)
# $Log: dbview.py,v $
# Revision 1.21 2007/06/26 02:36:16 syosi
# fix tabs
#
# Revision 1.20 2006/05/18 19:53:54 connolly
# note development has moved
#
# Revision 1.19 2003/03/02 06:07:46 connolly
# updated per bind API change, XML serializer move to toXML module
#
# Revision 1.18 2002/03/16 06:14:53 connolly
# allow command-line test cases for --testSQL
#
# Revision 1.17 2002/03/16 05:59:43 connolly
# xml mime type more convenient; fixed buggy namespace bindings
#
# Revision 1.16 2002/03/08 06:45:24 connolly
# fixed bug with empty where clause
# added --testSQL
# handle NULL values by not asserting anything. Hmm...
#
# Revision 1.15 2002/03/08 00:11:36 connolly
# HTTP export of schemas working.
# UI fleshed out a bit.
#
# Revision 1.14 2002/03/07 23:25:01 connolly
# moved ui path to not conflict with table names
#
# Revision 1.13 2002/03/07 00:24:43 connolly
# stop conflating stuff, per designissues thingy.
# lightly tested...
#
# Revision 1.12 2002/03/06 17:24:39 connolly
# add pointer to msql doc for mysql-specific schema-interrogation stuff
#
# Revision 1.11 2002/03/06 17:20:18 timbl
# (timbl) Changed through Jigsaw.
#
# Revision 1.10 2002/03/06 06:37:32 connolly
# structure browsing is starting to work:
# listing tables in a database,
# listing columns in a table.
# Converting to RDF/RDFS works.
# SwDB namespace name needs deciding.
# HTTP export is TODO.
#
# Revision 1.9 2002/03/06 05:41:32 connolly
# OK! basic query interface, including joins,
# seems to work.
#
# Revision 1.8 2002/03/06 03:44:05 connolly
# the multi-table case is starting to work...
# I haven't captured the joined relationships yet...
# I'm thinking about turning the fields structure inside out...
#
# Revision 1.7 2002/03/06 00:03:16 connolly
# starting to work out the forms UI...
#
# Revision 1.6 2002/03/05 22:16:24 connolly
# per-table namespace of columns starting to work...
#
# Revision 1.5 2002/03/05 22:04:18 connolly
# HTTP interface starting to work...
#