query-72d4b86c56cf9a7419926ddfb61a541b
TODO
Use at
- https://query.wikidata.org/sparql
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT ?item ?label_de ?lastname ?lastname_item
{
# Enable top-to-bottom execution
SERVICE bd:slice {
?item wdt:P27 wd:Q183 . # With Germany citizenship
bd:serviceParam bd:slice.offset 0 . # offset
bd:serviceParam bd:slice.limit 10000 . # number of items to scan
}
FILTER NOT EXISTS { ?item wdt:P734 [] } . # no family name
FILTER EXISTS { # only since 1875
?item wdt:P569 ?date_of_birth . # date of birth
FILTER (?date_of_birth >= "1875-01-01T00:00:00Z"^^xsd:dateTime)
}.
?item rdfs:label ?label_de FILTER(LANG(?label_de) = 'de') . # with german label
FILTER(REGEX(STR(?label_de), "^\\p{Lu}[\\p{Ll}-]+( \\p{Lu}[\\p{Ll}-]+)+$")) . # like: Aaaa Bbbb Cccc
FILTER EXISTS { ?item wdt:P31 wd:Q5 } . # human btw
BIND(REPLACE(?label_de, "^.* ([^\\s]+)$", "$1") AS ?lastname) # get lastname
FILTER(REGEX(STR(?lastname), "...")) . # minimum 3 letters
{
SELECT ?lastname (SAMPLE(?item) AS ?lastname_item) {
?item wdt:P31 wd:Q101352 ; # family names
wdt:P407 wd:Q188 ; # in German
rdfs:label ?lastname FILTER(LANG(?lastname) = "de")
} GROUP BY ?lastname HAVING(COUNT(?item) = 1) # non-ambiguous, just in case
} # hashjoin!
}
Query found at
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v5("?_anon_42b5416610844a34809f33a099ae9de198291")
v4("?date_of_birth")
v2("?item"):::projected
v3("?label_de"):::projected
v5("?lastname"):::projected
v6("?lastname_item"):::projected
a1((" "))
a2((" "))
c19(["wd:Q101352"]):::iri
c14(["0^^xsd:integer"]):::literal
c12(["bd:serviceParam"]):::iri
c3(["wd:Q5"]):::iri
c16(["10000^^xsd:integer"]):::literal
c11(["wd:Q183"]):::iri
c21(["wd:Q188"]):::iri
f0[["regex(str(?lastname),'...')"]]
f0 --> v5
f1[[" "]]
subgraph f1e0["Exists Clause"]
e0v1 --"wdt:P31"--> e0c2
e0v1("?item"):::projected
e0c2(["wd:Q5"]):::iri
end
f1--EXISTS--> f1e0
f1 --> v2
f1 --> c2
f1 --> c3
v2 --"wdt:P31"--> c3
f2[["regex(str(?label_de),'^\p{Lu}#91;\p{Ll}-#93;+( \p{Lu}#91;\p{Ll}-#93;+)+$')"]]
f2 --> v3
f3[["?label_de = 'de'"]]
f3 --> v3
f4[[" "]]
subgraph f4e1["Exists Clause"]
e1f0[["?date_of_birth >= '1875-01-01T00:00:00Z^^xsd:dateTime'"]]
e1f0 --> e1v1
e1v2 --"wdt:P569"--> e1v1
e1v1("?date_of_birth"):::projected
e1v2("?item"):::projected
end
f4--EXISTS--> f4e1
f4 --> v4
f4 --> v2
f4 --> c7
f5[["?date_of_birth >= '1875-01-01T00:00:00Z^^xsd:dateTime'"]]
f5 --> v4
v2 --"wdt:P569"--> v4
f6[["not "]]
subgraph f6e2["Exists Clause"]
e2v1 --"wdt:P734"--> e2a1
e2v1("?item"):::projected
e2a1((" ")):::projected
end
f6--EXISTS--> f6e2
f6 --> v2
f6 --> c8
f6 --> a1
v2 --"wdt:P734"--> a1
subgraph s1["http://www.bigdata.com/rdf#slice"]
style s1 stroke-width:4px;
v2 --"wdt:P27"--> c11
c12 --"bd:slice.offset"--> c14
c12 --"bd:slice.limit"--> c16
end
v2 --"rdfs:label"--> v3
bind7[/"replace(?label_de,'^.* (#91;^\s#93;+)$','$1')"/]
v3 --o bind7
bind7 --as--o v5
f8[[" = '1^^xsd:integer'"]]
f8 --> a2
f9[["?lastname = 'de'"]]
f9 --> v5
v2 --"wdt:P31"--> c19
v2 --"wdt:P407"--> c21
v2 --"rdfs:label"--> v5
bind12[/"count(?item)"/]
v2 --o bind12
bind12 --as--o v5
bind13[/"sample(?item)"/]
v2 --o bind13
bind13 --as--o v6