
Decentralized (Biomedical) Data Access with SPARQL tools.
Resources: <SWObjects sparql binary> and <sample queries>
(contains <goProt.map> <goProt.rq>.
<goProt2.map> <goProt2.rq> <goProt2-bug.rq>.)
This work is licensed under a
Creative Commons Attribution 3.0 License,
with attribution to W3C.
SELECT (f(?foo) AS ?bar) addresses some identifier divergence.sparql version 1.0 . Revision 1352 modified 2010-12-06 00:04:25 -0500 (Mon, 06 Dec 2010) by ericprud. https://swobjects.svn.sourceforge.net/svnroot/swobjects/branches/sparql11
sparql -D
{
<> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://usefulinc.com/ns/doap#Project> .
<> <http://usefulinc.com/ns/doap#homepage> <http://swobj.org/sparql/v1> .
<> <http://usefulinc.com/ns/doap#shortdesc> "a semantic web query toolbox" .
}
sparql -D --serve http://localhost:8888/SPARQLSELECT * WHERE { ?s ?p ?o }
sparql -G graph1 --serve http://localhost:8888/SPARQL
SELECT * WHERE { ?s ?p ?o }
SELECT * WHERE { GRAPH <graph1> { ?s ?p ?o } }... typically used to materialize transformations:
PREFIX :mydb <http://cityhospital.example/dbs>
CONSTRUCT { ?o a study:SubjectObservation .
?o study:subject ?p .
?o study:clinician ?d .
?d :foaf:name ?dName }
WHERE { ?o mydb:patient ?p .
?o mydb:doctor ?d .
?d mydb:name ?dName }
sparql -D -e "CONSTRUCT { ?s a <hobby> } WHERE { ?s a <http://usefulinc.com/ns/doap#Project> }"
<> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <hobby> .
sparql -D -e "PREFIX doap: <http://usefulinc.com/ns/doap#> CONSTRUCT { ?page <hasText> ?text } WHERE { ?s doap:homepage ?page ; doap:shortdesc ?text }"PREFIX doap: <http://usefulinc.com/ns/doap#>
CONSTRUCT { ?page <hasText> ?text }
WHERE { ?s doap:homepage ?page .
?s doap:shortdesc ?text }
<http://swobj.org/sparql/v1> <hasText> "a semantic web query toolbox" .
CREATE VIEW foo (
SELECT genes.id AS gene, labels.text AS label
FROM genes
JOIN lables ON genes.label = lables.id
)CONSTRUCT {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
} WHERE {
_:gene Ugene:acc ?id ; Ugene:val ?gene_symbol
}SELECT ?symbol WHERE {
_:prot uniprot:id 'P04637' .
_:prot skos:prefLabel ?symbol .
}
SELECT ?symbol
WHERE {
_:p <uniProt/gene#acc> "P04637" .
_:p <uniProt/gene#val> ?symbol .
}
CONSTRUCT
{
?g uniprot:id ?id .
?g skos:prefLabel ?gene_symbol .
}
WHERE
{
?g <uniProt/gene#acc> ?id .
?g <uniProt/gene#val> ?gene_symbol .
}
sparql -m goProt0.map -np goProt0.rqsparql -m goProt0.map -d goProt.ttl goProt0.rqgoProt0.rq
SELECT ?symbol WHERE {
_:prot uniprot:id 'P04637' .
_:prot skos:prefLabel ?symbol .
}transformed query
SELECT ?symbol
WHERE {
_:p <uniProt/gene#acc> "P04637" .
_:p <uniProt/gene#val> ?symbol .
}
goProt0.map
CONSTRUCT
{
?g uniprot:id ?id .
?g skos:prefLabel ?gene_symbol .
}
WHERE
{
?g <uniProt/gene#acc> ?id .
?g <uniProt/gene#val> ?gene_symbol .
}
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
sparql -m goProt0.map -d goProt.ttl goProt0.rqgoProt0.rq
SELECT ?symbol WHERE {
_:prot uniprot:id 'P04637' .
_:prot skos:prefLabel ?symbol .
}transformed query
SELECT ?symbol
WHERE {
_:p <uniProt/gene#acc> "P04637" .
_:p <uniProt/gene#val> ?symbol .
}
goProt0.map
CONSTRUCT
{
?g uniprot:id ?id .
?g skos:prefLabel ?gene_symbol .
}
WHERE
{
?g <uniProt/gene#acc> ?id .
?g <uniProt/gene#val> ?gene_symbol .
}
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
What makes data sharable?
How do we make this happen?
CONSTRUCT {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
} WHERE {
SELECT (fn:concat(<http://purl.org/shared-names/protein/>, ?id) AS ?gene)
(fn:lower-case(?u_gene_symbol) AS ?gene_symbol)
{
_:gene Ugene:acc ?id ; Ugene:val ?u_gene_symbol
}
}
If it's not computable, you need a lookup.
sparql -d goProt.ttl goProt1.mapsparql -m goProt0.map -d goProt.ttl goProt0.rqgoProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
result
<http…/P04637> skos:prefLabel "p53" . <http…/P04637> skos:prefLabel "tp53" .
goProt1.map
CONSTRUCT
{
?gene uniprot:id ?id .
?gene skos:prefLabel ?sym .
}
WHERE
{
SELECT (IRI(fn:concat("http…/", ?id)) AS ?gene)
(fn:lower-case(?u_sym) AS ?sym) {
_:x <uniProt/gene#acc> ?id .
_:x <uniProt/gene#val> ?u_sym .
}
}
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
sparql -d goProt.ttl goProt1.mapsparql -m goProt1-cheat.map -d goProt.ttl goProt1.rqgoProt1.rq
SELECT ?symbol WHERE {
<http…/P04637>
skos:prefLabel ?symbol .
}goProt1.map
CONSTRUCT
{
?gene uniprot:id ?id .
?gene skos:prefLabel ?sym .
}
WHERE
{
SELECT (fn:concat(<http…/>, ?id) AS ?gene)
(fn:lower-case(?u_sym) AS ?sym) {
_:x <uniProt/gene#acc> ?id .
_:x <uniProt/gene#val> ?u_sym .
}
}
transformed query
SELECT ?symbol WHERE {
SELECT (<http…/P04637> AS ?gene)
(fn:lower-case(?_r1_0_u_sym) AS ?symbol)
WHERE {
_:_r1_0_x <uniProt/gene#acc> "P04637" .
_:_r1_0_x <uniProt/gene#val> ?_r1_0_u_sym .
}
}
goProt.ttl
<gene#_1> <uniProt/gene#acc> 'P04637' . <gene#_1> <uniProt/gene#val> 'P53' . <gene#_1> <uniProt/gene#val> 'TP53' .
results
?symbol "P53" "TP53"
SERVICE constraints around rule body:
CONSTRUCT {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
} WHERE { SERVICE <http://localhost:8001/uniProt> {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
}
}SELECT ?id ?gene_symbol WHERE {
?gene uniprot:id ?id ; skos:prefLabel ?gene_symbol
}to be directed to appropriate endpoints.sparql -m goProt2.map -np goProt2.rq
SELECT ?symbol ?label
WHERE
{
{
SELECT (<http://www.uniprot.org/uniprot/P04637> AS ?gene)
(fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol)
WHERE
SERVICE <http://localhost:8001/uniProt>
{
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#acc> "P04637" .
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#val> ?_uniProt_0_u_gene_symbol .
}
}
SERVICE <http://localhost:8003/go>
{
?_go_0_gp <http://ucsc.example/go/gene_product#Symbol> ?symbol .
?_go_0_association <http://ucsc.example/go/association#gene_product_id> ?_go_0_gp .
?_go_0_association <http://ucsc.example/go/association#term_id> ?_go_0_t .
?_go_0_t <http://ucsc.example/go/term#name> ?label .
}
}
sparql -m goProt2.map -np goProt2-bug.rq
failed to match triples prefixed by "!" in
SELECT ?symbol ?label
WHERE
{
<http://www.uniprot.org/uniprot/P04637> skos:prefLabel ?symbol .
! ?product <http://yetanothergenevocabulary.org999/#symbol> ?symbol .
! ?id <http://yetanothergenevocabulary.org999/#product> ?product .
?id <http://www.geneontology.org/dtd/go.dtdterm> ?goterm .
?goterm <http://www.w3.org/2000/01/rdf-schema#label> ?label .
}
sparql --debug 1 -m goProt2.map -np goProt2-bug.rq
| PK | → Address(ID) | |
|---|---|---|
| ID | fname | addr |
| 7 | Bob | 18 |
| 8 | Sue | NULL |
| PK | ||
|---|---|---|
| ID | city | state |
| 18 | Cambridge | MA |
<People/ID=7#_> <People#ID> 7 . <People/ID=7#_> <People#fname> "Bob" . <People/ID=7#_> <People#addr> <Addresses/ID=18#_> . <People/ID=8#_> <People#ID> 8 . <People/ID=8#_> <People#fname> "Sue" . <Addresses/ID=18#_> <Addresses#ID> 18 . <Addresses/ID=18#_> <Addresses#city> "Cambridge" . <Addresses/ID=18#_> <Addresses#state> "MA" .
<People#fname> vs. foaf:name.<People/ID=7#_> <People#ID> 7 . <People/ID=7#_> <People#fname> "Bob" . <People/ID=7#_> <People#addr> <Addresses/ID=18#_> . <People/ID=8#_> <People#ID> 8 . <People/ID=8#_> <People#fname> "Sue" . <Addresses/ID=18#_> <Addresses#ID> 18 . <Addresses/ID=18#_> <Addresses#city> "Cambridge" . <Addresses/ID=18#_> <Addresses#state> "MA" .
sparql --stem http://ucsc.example/uniProt/ -S mysql://genome@genome-mysql.cse.ucsc.edu/uniProt --serve http://localhost:8001/uniProtsparql --stem http://ucsc.example/go/ -S mysql://genome@genome-mysql.cse.ucsc.edu/go --serve http://localhost:8003/gosparql -m goProt2.map -8d goProt.ttl goProt2.rq| ?symbol | ?label |
|---|---|
| "tp53" | "DNA binding |
| "tp53" | "transcription factor activity" |
| … | … +156 rows … |
SELECT ?symbol WHERE {
_:prot uniprot:id 'P04637' .
_:prot skos:prefLabel ?symbol .
...
}
SELECT (<http://www.uniprot.org/uniprot/P04637> AS ?gene)
(fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol)
WHERE SERVICE <http://localhost:8001/uniProt> {
_:_uniProt_0_gene gene:acc "P04637" .
_:_uniProt_0_gene gene:val ?_uniProt_0_u_gene_symbol }
}
SELECT DISTINCT _uniProt_0_gene.val AS _uniProt_0_u_gene_symbol
FROM gene AS _uniProt_0_gene
WHERE _uniProt_0_gene.acc="P04637"
...
?product gene:symbol ?symbol .
?id gene:product ?product .
?id go:term ?goterm .
?goterm rdfs:label ?label .
SERVICE <http://localhost:8003/go>
{
?_go_0_gp <http://ucsc.example/go/gene_product#Symbol> ?symbol .
?_go_0_association <http://ucsc.example/go/association#gene_product_id> ?_go_0_gp .
?_go_0_association <http://ucsc.example/go/association#term_id> ?_go_0_t .
?_go_0_t <http://ucsc.example/go/term#name> ?label .
}
SELECT DISTINCT _go_0_gp.ID AS _go_0_gp, _go_0_gp.Symbol AS symbol, _go_0_association.ID AS _go_0_association, _go_0_association.term_id AS _go_0_t, _go_0_t.name AS label
FROM gene_product AS _go_0_gp
INNER JOIN association AS _go_0_association ON _go_0_association.gene_product_id=_go_0_gp.ID
INNER JOIN term AS _go_0_t ON _go_0_t.ID=_go_0_association.term_id
WHERE (_go_0_gp.Symbol = "tp53" OR _go_0_gp.Symbol = "p53")
The semantics align well, but implementations focus on different use case optimizations.
A foaf:know B. B foaf:knows C. requires 2 X ?x foaf:knows ?yResolve as much as possible;
huge impact on performance.
<http://www.uniprot.org/uniprot/P04637>
skos:prefLabel ?symbol .
= >
SELECT (fn:lower-case(?_uniProt_0_u_gene_symbol) AS ?symbol)
WHERE {
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#acc> "P04637" .
_:_uniProt_0_gene <http://ucsc.example/uniProt/gene#val> ?_uniProt_0_u_gene_symbol .
}
Resolve as deeply as possible.
CONCAT("a", ?b, "c", ?d) = "a123bxyz": → regexp("a(.*?)c(.*?)")?a + ?b = 3
fn:upper-case(?s) = "ASDF"
