; $Id: wn.lisp,v 1.5 2006/12/28 16:23:37 swick Exp $ ; ; Computes CBDs (Concise Bounded Descriptionse [1]) of WordNet ; synsets, wordsenses, and words. URIs are as per ; RDF/OWL Representation of WordNet [2]. ; ; Runs under Wilbur [3] on Allegro Common Lisp. ; ; Author: Ralph R. Swick ; Copyright 2006 World Wide Web Consortium ; ; W3C software notice and license terms [4] apply. ; ; [1] http://www.w3.org/Submission/2005/SUBM-CBD-20050603/ ; [2] http://www.w3.org/2001/sw/BestPractices/WNET/wn-conversion-20062304 ; [3] http://wilbur-rdf.sourceforge.net/ ; [4] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 (require :regexp2) (in-package :w) (enable-node-shorthand) (defun dump-wn (class) (setf class (case class ('wordsense "wordsense") ('synset "synset") ('word "word"))) (assert class nil "arg must be one of 'wordsense, 'synset, or 'word") (let ((nodes (dictionary-apropos-list *nodes* (format nil "instances/~A-" class)))) (format t "~A ~As" (length nodes) class) (dolist (node nodes) (let ((cbd (db-find-cbd *db* node)) (sname (second (split-re ":" (node-name node)))) (fname (concatenate 'string (second (split-re ":" (node-name node))) ".rdf"))) ; WinXP has case-sensitive filenames on write but considers ; two names that differ only in case to be the same file. ; So we have to take care not to overwrite any data. (cond ((probe-file fname) (format t "~&~A already exists" fname) (let ((newfname (concatenate 'string fname "x"))) (while (probe-file newfname) (setf newfname (concatenate 'string newfname "x"))) (rename-file fname newfname)))) (handler-case ; (let ((stream *standard-output*)) (with-open-file (stream fname :direction :output :if-exists :rename) (cond (cbd (setf subject (triple-subject (first cbd))))) (format stream " " (node-uri subject)) (dolist (triple cbd) (if (not (eq subject (triple-subject triple))) (warn "cbd has multiple subjects")) (let ((predicate-n (node-name (triple-predicate triple))) (object (triple-object triple))) (format stream " <~A" predicate-n) (typecase object ((or string literal) (format stream ">~A" (literal-string object) predicate-n)) (otherwise (format stream " rdf:resource='~A'/>" (node-uri object)))))) (princ " " stream)) (file-error (err) (format t "~&~A" err)))))) t) (defun dump-types () (setf triples (db-query *db* nil !"http://www.w3.org/1999/02/22-rdf-syntax-ns#type" nil)) (format t "~A typed objects" (length triples)) (setf prev-subj nil) ; (let ((stream *standard-output*)) (with-open-file (stream "index.rdf" :direction :output :if-exists :rename) (format stream " ") (dolist (triple triples) (let ((subj (triple-subject triple)) (type (triple-object triple))) (cond ((eq subj prev-subj) (format t "~&repeated subj"))) (setf prev-subj subj) (format stream "~&<~A rdf:about='~A'/>" (node-name type) (node-uri subj)))) (format stream " ")))