Decentralized (Biomedical) Data Access with SPARQL tools.
Resources: <SWObjects sparql binary> and <sample queries>
(contains <goProt.map> <goProt.rq>.
<goProt2.map> <goProt2.rq> <goProt2-bug.rq>.)
SELECT (f(?foo) AS ?bar)
addresses some identifier divergence.sparql version 1.0 . Revision 1352 modified 2010-12-06 00:04:25 -0500 (Mon, 06 Dec 2010) by ericprud. https://swobjects.svn.sourceforge.net/svnroot/swobjects/branches/sparql11
sparql -D
{ <> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://usefulinc.com/ns/doap#Project> . <> <http://usefulinc.com/ns/doap#homepage> <http://swobj.org/sparql/v1> . <> <http://usefulinc.com/ns/doap#shortdesc> "a semantic web query toolbox" . }
sparql -D --serve http://localhost:8888/SPARQL
SELECT * WHERE { ?s ?p ?o }
sparql -G graph1 --serve http://localhost:8888/SPARQL
SELECT * WHERE { ?s ?p ?o }
SELECT * WHERE { GRAPH <graph1> { ?s ?p ?o } }
... typically used to materialize transformations:
PREFIX :mydb <http://cityhospital.example/dbs> CONSTRUCT { ?o a study:SubjectObservation . ?o study:subject ?p . ?o study:clinician ?d . ?d :foaf:name ?dName }
WHERE { ?o mydb:patient ?p . ?o mydb:doctor ?d . ?d mydb:name ?dName }
sparql -D -e "CONSTRUCT { ?s a <hobby> } WHERE { ?s a <http://usefulinc.com/ns/doap#Project> }"
<> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <hobby> .
sparql -D -e "PREFIX doap: <http://usefulinc.com/ns/doap#> CONSTRUCT { ?page <hasText> ?text } WHERE { ?s doap:homepage ?page ; doap:shortdesc ?text }"
PREFIX doap: <http://usefulinc.com/ns/doap#> CONSTRUCT { ?page <hasText> ?text } WHERE { ?s doap:homepage ?page . ?s doap:shortdesc ?text }
<http://swobj.org/sparql/v1> <hasText> "a semantic web query toolbox" .
CREATE VIEW foo ( SELECT genes.id AS gene, labels.text AS label FROM genes JOIN lables ON genes.label = lables.id )
CONSTRUCT { ?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol } WHERE { _:gene Ugene:acc ?id ; Ugene:val ?gene_symbol }
SELECT ?symbol WHERE { _:prot uniprot:id 'P04637' . _:prot skos:prefLabel ?symbol . }
SELECT ?symbol WHERE { _:p <uniProt/gene#acc> "P04637" . _:p <uniProt/gene#val> ?symbol . }
CONSTRUCT { ?g uniprot:id ?id . ?g skos:prefLabel ?gene_symbol . } WHERE { ?g <uniProt/gene#acc> ?id . ?g <uniProt/gene#val> ?gene_symbol . }
sparql -m goProt0.map -np goProt0.rq
sparql -m goProt0.map -d goProt.ttl goProt0.rq
goProt0.rq
SELECT ?symbol WHERE { _:prot uniprot:id 'P04637' . _:prot skos:prefLabel ?symbol . }
transformed query
SELECT ?symbol WHERE { _:p <uniProt/gene#acc> "P04637" . _:p <uniProt/gene#val> ?symbol . }
goProt0.map
CONSTRUCT { ?g uniprot:id ?id . ?g skos:prefLabel ?gene_symbol . } WHERE { ?g <uniProt/gene#acc> ?id . ?g <uniProt/gene#val> ?gene_symbol . }
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
sparql -m goProt0.map -d goProt.ttl goProt0.rq
goProt0.rq
SELECT ?symbol WHERE { _:prot uniprot:id 'P04637' . _:prot skos:prefLabel ?symbol . }
transformed query
SELECT ?symbol WHERE { _:p <uniProt/gene#acc> "P04637" . _:p <uniProt/gene#val> ?symbol . }
goProt0.map
CONSTRUCT { ?g uniprot:id ?id . ?g skos:prefLabel ?gene_symbol . } WHERE { ?g <uniProt/gene#acc> ?id . ?g <uniProt/gene#val> ?gene_symbol . }
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
What makes data sharable?
How do we make this happen?
CONSTRUCT {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
} WHERE {
SELECT (fn:concat(<http://purl.org/shared-names/protein/>, ?id) AS ?gene)
(fn:lower-case(?u_gene_symbol) AS ?gene_symbol)
{
_:gene Ugene:acc ?id ; Ugene:val ?u_gene_symbol
}
}
If it's not computable, you need a lookup.
sparql -d goProt.ttl goProt1.map
sparql -m goProt0.map -d goProt.ttl goProt0.rq
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
result
<http…/P04637> skos:prefLabel "p53" . <http…/P04637> skos:prefLabel "tp53" .
goProt1.map
CONSTRUCT { ?gene uniprot:id ?id . ?gene skos:prefLabel ?sym . } WHERE { SELECT (IRI(fn:concat("http…/", ?id)) AS ?gene) (fn:lower-case(?u_sym) AS ?sym) { _:x <uniProt/gene#acc> ?id . _:x <uniProt/gene#val> ?u_sym . } }
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
sparql -d goProt.ttl goProt1.map
sparql -m goProt1-cheat.map -d goProt.ttl goProt1.rq
goProt1.rq
SELECT ?symbol WHERE { <http…/P04637> skos:prefLabel ?symbol . }
goProt1.map
CONSTRUCT { ?gene uniprot:id ?id . ?gene skos:prefLabel ?sym . } WHERE { SELECT (fn:concat(<http…/>, ?id) AS ?gene) (fn:lower-case(?u_sym) AS ?sym) { _:x <uniProt/gene#acc> ?id . _:x <uniProt/gene#val> ?u_sym . } }
transformed query
SELECT ?symbol WHERE { SELECT (<http…/P04637> AS ?gene) (fn:lower-case(?_r1_0_u_sym) AS ?symbol) WHERE { _:_r1_0_x <uniProt/gene#acc> "P04637" . _:_r1_0_x <uniProt/gene#val> ?_r1_0_u_sym . } }
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
SERVICE
constraints around rule body:
CONSTRUCT { ?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol } WHERE { SERVICE <http://localhost:8001/uniProt> { ?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol } }
SELECT ?id ?gene_symbol WHERE { ?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol }to be directed to appropriate endpoints.
sparql -m goProt2.map -np goProt2.rq
SELECT ?symbol ?label WHERE { { SELECT (<http://www.uniprot.org/uniprot/P04637> AS ?gene) (fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol) WHERE SERVICE <http://localhost:8001/uniProt> { _:_uniProt_0_gene <http://ucsc.example/uniProt/gene#acc> "P04637" . _:_uniProt_0_gene <http://ucsc.example/uniProt/gene#val> ?_uniProt_0_u_gene_symbol . } } SERVICE <http://localhost:8003/go> { ?_go_0_gp <http://ucsc.example/go/gene_product#Symbol> ?symbol . ?_go_0_association <http://ucsc.example/go/association#gene_product_id> ?_go_0_gp . ?_go_0_association <http://ucsc.example/go/association#term_id> ?_go_0_t . ?_go_0_t <http://ucsc.example/go/term#name> ?label . } }
sparql -m goProt2.map -np goProt2-bug.rq
failed to match triples prefixed by "!" in SELECT ?symbol ?label WHERE { <http://www.uniprot.org/uniprot/P04637> skos:prefLabel ?symbol . ! ?product <http://yetanothergenevocabulary.org999/#symbol> ?symbol . ! ?id <http://yetanothergenevocabulary.org999/#product> ?product . ?id <http://www.geneontology.org/dtd/go.dtdterm> ?goterm . ?goterm <http://www.w3.org/2000/01/rdf-schema#label> ?label . }
sparql --debug 1 -m goProt2.map -np goProt2-bug.rq
PK | → Address(ID) | |
---|---|---|
ID | fname | addr |
7 | Bob | 18 |
8 | Sue | NULL |
PK | ||
---|---|---|
ID | city | state |
18 | Cambridge | MA |
<People/ID=7#_> <People#ID> 7 . <People/ID=7#_> <People#fname> "Bob" . <People/ID=7#_> <People#addr> <Addresses/ID=18#_> . <People/ID=8#_> <People#ID> 8 . <People/ID=8#_> <People#fname> "Sue" . <Addresses/ID=18#_> <Addresses#ID> 18 . <Addresses/ID=18#_> <Addresses#city> "Cambridge" . <Addresses/ID=18#_> <Addresses#state> "MA" .
<People#fname>
vs. foaf:name
.<People/ID=7#_> <People#ID> 7 . <People/ID=7#_> <People#fname> "Bob" . <People/ID=7#_> <People#addr> <Addresses/ID=18#_> . <People/ID=8#_> <People#ID> 8 . <People/ID=8#_> <People#fname> "Sue" . <Addresses/ID=18#_> <Addresses#ID> 18 . <Addresses/ID=18#_> <Addresses#city> "Cambridge" . <Addresses/ID=18#_> <Addresses#state> "MA" .
sparql --stem http://ucsc.example/uniProt/ -S mysql://genome@genome-mysql.cse.ucsc.edu/uniProt --serve http://localhost:8001/uniProt
sparql --stem http://ucsc.example/go/ -S mysql://genome@genome-mysql.cse.ucsc.edu/go --serve http://localhost:8003/go
sparql -m goProt2.map -8d goProt.ttl goProt2.rq
?symbol | ?label |
---|---|
"tp53" | "DNA binding |
"tp53" | "transcription factor activity" |
… | … +156 rows … |
SELECT ?symbol WHERE {
_:prot uniprot:id 'P04637' .
_:prot skos:prefLabel ?symbol .
...
}
SELECT (<http://www.uniprot.org/uniprot/P04637> AS ?gene) (fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol) WHERE SERVICE <http://localhost:8001/uniProt> { _:_uniProt_0_gene gene:acc "P04637" . _:_uniProt_0_gene gene:val ?_uniProt_0_u_gene_symbol } }
SELECT DISTINCT _uniProt_0_gene.val AS _uniProt_0_u_gene_symbol
FROM gene AS _uniProt_0_gene
WHERE _uniProt_0_gene.acc="P04637"
...
?product gene:symbol ?symbol .
?id gene:product ?product .
?id go:term ?goterm .
?goterm rdfs:label ?label .
SERVICE <http://localhost:8003/go>
{
?_go_0_gp <http://ucsc.example/go/gene_product#Symbol> ?symbol .
?_go_0_association <http://ucsc.example/go/association#gene_product_id> ?_go_0_gp .
?_go_0_association <http://ucsc.example/go/association#term_id> ?_go_0_t .
?_go_0_t <http://ucsc.example/go/term#name> ?label .
}
SELECT DISTINCT _go_0_gp.ID AS _go_0_gp, _go_0_gp.Symbol AS symbol, _go_0_association.ID AS _go_0_association, _go_0_association.term_id AS _go_0_t, _go_0_t.name AS label FROM gene_product AS _go_0_gp INNER JOIN association AS _go_0_association ON _go_0_association.gene_product_id=_go_0_gp.ID INNER JOIN term AS _go_0_t ON _go_0_t.ID=_go_0_association.term_id WHERE (_go_0_gp.Symbol = "tp53" OR _go_0_gp.Symbol = "p53")
The semantics align well, but implementations focus on different use case optimizations.
A foaf:know B. B foaf:knows C.
requires 2 X ?x foaf:knows ?y
Resolve as much as possible;
huge impact on performance.
<http://www.uniprot.org/uniprot/P04637>
skos:prefLabel ?symbol .
= >
SELECT (fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol)
WHERE {
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#acc> "P04637" .
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#val> ?_uniProt_0_u_gene_symbol .
}
Resolve as deeply as possible.
CONCAT("a", ?b, "c", ?d) = "a123bxyz"
: → regexp("a(.*?)c(.*?)")?a + ?b = 3
fn:upper-case(?s) = "ASDF"