query-9cc1c545674a9067f7ddd69a8e8dbfaa

rq turtle/ttl

.on Mastodon and on TwitterOriginally posted

Use at

PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX p: <http://www.wikidata.org/prop/>
#TEMPLATE={ "template": { "en": "settlements in ?state with more than one word in the name" }, "variables": { "?state": { "query": "SELECT DISTINCT ?id WHERE { ?id wdt:P31/wdt:P279* wd:Q3624078; p:P463 ?memberOfStatement. ?memberOfStatement a wikibase:BestRank; ps:P463 wd:Q1065. MINUS { ?memberOfStatement pq:P582 ?endTime. } MINUS { ?id wdt:P576|wdt:P582 ?end. } }" } } }
#defaultView:Map{ "layer": "?words", "hide": ["?words", "?coordinates"] }
SELECT ?city ?cityLabel ?words ?coordinates WHERE {
  BIND(wd:Q142 AS ?state)
  ?city wdt:P31/wdt:P279* wd:Q486972;
        wdt:P17 ?state;
        wdt:P625 ?coordinates;
        wdt:P1448 ?cityLabel.
  # According to https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html, a word character (\w) is equivalent to the following character class, assuming the UNICODE_CHARACTER_CLASS flag is defined:
  # [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}]
  # BlazeGraph does not define this flag, therefore we have to emulate word characters using the following variant:
  # [\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}]
  # That is, a word character is alphabetic, any kind of mark (nonspacing, enclosing, or spacing combining), a digit, or connector punctuation.
  # Non-word characters are the inverse of this character class,
  # and words are runs of one or more word characters separated by one or more non-word characters.
  # As an optimization, at the beginning and end of the pattern we can replace “one or more” with “one”,
  # so that the extra characters (if they exist) are not part of the match.
  # As a further optimization, we pre-filter the names using the much simpler regex \w\W+\w,
  # which has some false positives (e. g. Zürich, due to the umlaut) but no false negatives.
  FILTER(REGEX(?cityLabel, "\\w\\W+\\w"))
  FILTER(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"))
  BIND(IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "5+"@en,
          IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "4"@en,
             IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "3"@en,
                "2"@en)))
       AS ?words)
}
ORDER BY DESC(?words)

Query found at

graph TD classDef projected fill:lightgreen; classDef literal fill:orange; classDef iri fill:yellow; v4("?city"):::projected v2("?cityLabel"):::projected v5("?coordinates"):::projected v3("?state") v6("?words"):::projected a1((" ")) c5(["wd:Q486972"]):::iri f0[["regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;')"]] f0 --> v2 f1[["regex(?cityLabel,'\w\W+\w')"]] f1 --> v2 bind2[/"'wd:Q142'"/] bind2 --as--o v3 v4 --"p:direct/P31"--> a1 a1 --"p:direct/P279"--> c5 v4 --"p:direct/P17"--> v3 v4 --"p:direct/P625"--> v5 v4 --"p:direct/P1448"--> v2 bind3[/"if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s5+^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s4^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s3^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',s2^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>')))"/] v2 --o bind3 bind3 --as--o v6