; $Id: wn.lisp,v 1.5 2006/12/28 16:23:37 swick Exp $
;
; Computes CBDs (Concise Bounded Descriptionse [1]) of WordNet
; synsets, wordsenses, and words. URIs are as per
; RDF/OWL Representation of WordNet [2].
;
; Runs under Wilbur [3] on Allegro Common Lisp.
;
; Author: Ralph R. Swick
; Copyright 2006 World Wide Web Consortium
;
; W3C software notice and license terms [4] apply.
;
; [1] http://www.w3.org/Submission/2005/SUBM-CBD-20050603/
; [2] http://www.w3.org/2001/sw/BestPractices/WNET/wn-conversion-20062304
; [3] http://wilbur-rdf.sourceforge.net/
; [4] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
(require :regexp2)
(in-package :w)
(enable-node-shorthand)
(defun dump-wn (class)
(setf class (case class
('wordsense "wordsense")
('synset "synset")
('word "word")))
(assert class nil "arg must be one of 'wordsense, 'synset, or 'word")
(let ((nodes (dictionary-apropos-list *nodes*
(format nil "instances/~A-" class))))
(format t "~A ~As" (length nodes) class)
(dolist (node nodes)
(let ((cbd (db-find-cbd *db* node))
(sname (second (split-re ":" (node-name node))))
(fname (concatenate 'string (second (split-re ":" (node-name node))) ".rdf")))
; WinXP has case-sensitive filenames on write but considers
; two names that differ only in case to be the same file.
; So we have to take care not to overwrite any data.
(cond ((probe-file fname)
(format t "~&~A already exists" fname)
(let ((newfname (concatenate 'string fname "x")))
(while (probe-file newfname)
(setf newfname (concatenate 'string newfname "x")))
(rename-file fname newfname))))
(handler-case
; (let ((stream *standard-output*))
(with-open-file (stream fname :direction :output :if-exists :rename)
(cond (cbd (setf subject (triple-subject (first cbd)))))
(format stream "
" (node-uri subject))
(dolist (triple cbd)
(if (not (eq subject (triple-subject triple)))
(warn "cbd has multiple subjects"))
(let ((predicate-n (node-name (triple-predicate triple)))
(object (triple-object triple)))
(format stream "
<~A" predicate-n)
(typecase object
((or string literal)
(format stream ">~A~A>"
(literal-string object)
predicate-n))
(otherwise
(format stream
" rdf:resource='~A'/>"
(node-uri object))))))
(princ "
" stream))
(file-error (err)
(format t "~&~A" err))))))
t)
(defun dump-types ()
(setf triples
(db-query *db*
nil
!"http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
nil))
(format t "~A typed objects" (length triples))
(setf prev-subj nil)
; (let ((stream *standard-output*))
(with-open-file
(stream "index.rdf" :direction :output :if-exists :rename)
(format stream "
")
(dolist (triple triples)
(let ((subj (triple-subject triple))
(type (triple-object triple)))
(cond ((eq subj prev-subj)
(format t "~&repeated subj")))
(setf prev-subj subj)
(format stream "~&<~A rdf:about='~A'/>"
(node-name type)
(node-uri subj))))
(format stream "
")))