query-72d4b86c56cf9a7419926ddfb61a541b

rq turtle/ttl

TODO

Use at

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT ?item ?label_de ?lastname ?lastname_item
{
               # Enable top-to-bottom execution

  SERVICE bd:slice {
    ?item wdt:P27 wd:Q183 .                      # With Germany citizenship
    bd:serviceParam bd:slice.offset 0 .          # offset 
    bd:serviceParam bd:slice.limit 10000 .       # number of items to scan
  }

  FILTER NOT EXISTS { ?item wdt:P734 [] } .      # no family name

  FILTER EXISTS {                                # only since 1875
    ?item wdt:P569 ?date_of_birth .              # date of birth

    FILTER (?date_of_birth >= "1875-01-01T00:00:00Z"^^xsd:dateTime)
  }.

  ?item rdfs:label ?label_de FILTER(LANG(?label_de) = 'de') .                   # with german label
  FILTER(REGEX(STR(?label_de), "^\\p{Lu}[\\p{Ll}-]+( \\p{Lu}[\\p{Ll}-]+)+$")) . # like: Aaaa Bbbb Cccc

  FILTER EXISTS { ?item wdt:P31 wd:Q5 } .                       # human btw

  BIND(REPLACE(?label_de, "^.* ([^\\s]+)$", "$1") AS ?lastname) # get lastname
  FILTER(REGEX(STR(?lastname), "...")) .                        # minimum 3 letters

   {
  SELECT ?lastname (SAMPLE(?item) AS ?lastname_item) {
    ?item wdt:P31 wd:Q101352 ;                   # family names
          wdt:P407 wd:Q188 ;                     # in German
          rdfs:label ?lastname FILTER(LANG(?lastname) = "de")
  } GROUP BY ?lastname HAVING(COUNT(?item) = 1)  # non-ambiguous, just in case
}                                            # hashjoin!
}

Query found at

graph TD classDef projected fill:lightgreen; classDef literal fill:orange; classDef iri fill:yellow; v5("?_anon_42b5416610844a34809f33a099ae9de198291") v4("?date_of_birth") v2("?item"):::projected v3("?label_de"):::projected v5("?lastname"):::projected v6("?lastname_item"):::projected a1((" ")) a2((" ")) c19(["wd:Q101352"]):::iri c14(["0^^xsd:integer"]):::literal c12(["bd:serviceParam"]):::iri c3(["wd:Q5"]):::iri c16(["10000^^xsd:integer"]):::literal c11(["wd:Q183"]):::iri c21(["wd:Q188"]):::iri f0[["regex(str(?lastname),'...')"]] f0 --> v5 f1[[" "]] subgraph f1e0["Exists Clause"] e0v1 --"wdt:P31"--> e0c2 e0v1("?item"):::projected e0c2(["wd:Q5"]):::iri end f1--EXISTS--> f1e0 f1 --> v2 f1 --> c2 f1 --> c3 v2 --"wdt:P31"--> c3 f2[["regex(str(?label_de),'^\p{Lu}#91;\p{Ll}-#93;+( \p{Lu}#91;\p{Ll}-#93;+)+$')"]] f2 --> v3 f3[["?label_de = 'de'"]] f3 --> v3 f4[[" "]] subgraph f4e1["Exists Clause"] e1f0[["?date_of_birth >= '1875-01-01T00:00:00Z^^xsd:dateTime'"]] e1f0 --> e1v1 e1v2 --"wdt:P569"--> e1v1 e1v1("?date_of_birth"):::projected e1v2("?item"):::projected end f4--EXISTS--> f4e1 f4 --> v4 f4 --> v2 f4 --> c7 f5[["?date_of_birth >= '1875-01-01T00:00:00Z^^xsd:dateTime'"]] f5 --> v4 v2 --"wdt:P569"--> v4 f6[["not "]] subgraph f6e2["Exists Clause"] e2v1 --"wdt:P734"--> e2a1 e2v1("?item"):::projected e2a1((" ")):::projected end f6--EXISTS--> f6e2 f6 --> v2 f6 --> c8 f6 --> a1 v2 --"wdt:P734"--> a1 subgraph s1["http://www.bigdata.com/rdf#slice"] style s1 stroke-width:4px; v2 --"wdt:P27"--> c11 c12 --"bd:slice.offset"--> c14 c12 --"bd:slice.limit"--> c16 end v2 --"rdfs:label"--> v3 bind7[/"replace(?label_de,'^.* (#91;^\s#93;+)$','$1')"/] v3 --o bind7 bind7 --as--o v5 f8[[" = '1^^xsd:integer'"]] f8 --> a2 f9[["?lastname = 'de'"]] f9 --> v5 v2 --"wdt:P31"--> c19 v2 --"wdt:P407"--> c21 v2 --"rdfs:label"--> v5 bind12[/"count(?item)"/] v2 --o bind12 bind12 --as--o v5 bind13[/"sample(?item)"/] v2 --o bind13 bind13 --as--o v6