query-9cc1c545674a9067f7ddd69a8e8dbfaa
.on Mastodon and on TwitterOriginally posted
Use at
- https://query.wikidata.org/sparql
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX p: <http://www.wikidata.org/prop/>
#TEMPLATE={ "template": { "en": "settlements in ?state with more than one word in the name" }, "variables": { "?state": { "query": "SELECT DISTINCT ?id WHERE { ?id wdt:P31/wdt:P279* wd:Q3624078; p:P463 ?memberOfStatement. ?memberOfStatement a wikibase:BestRank; ps:P463 wd:Q1065. MINUS { ?memberOfStatement pq:P582 ?endTime. } MINUS { ?id wdt:P576|wdt:P582 ?end. } }" } } }
#defaultView:Map{ "layer": "?words", "hide": ["?words", "?coordinates"] }
SELECT ?city ?cityLabel ?words ?coordinates WHERE {
BIND(wd:Q142 AS ?state)
?city wdt:P31/wdt:P279* wd:Q486972;
wdt:P17 ?state;
wdt:P625 ?coordinates;
wdt:P1448 ?cityLabel.
# According to https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html, a word character (\w) is equivalent to the following character class, assuming the UNICODE_CHARACTER_CLASS flag is defined:
# [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}]
# BlazeGraph does not define this flag, therefore we have to emulate word characters using the following variant:
# [\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}]
# That is, a word character is alphabetic, any kind of mark (nonspacing, enclosing, or spacing combining), a digit, or connector punctuation.
# Non-word characters are the inverse of this character class,
# and words are runs of one or more word characters separated by one or more non-word characters.
# As an optimization, at the beginning and end of the pattern we can replace “one or more” with “one”,
# so that the extra characters (if they exist) are not part of the match.
# As a further optimization, we pre-filter the names using the much simpler regex \w\W+\w,
# which has some false positives (e. g. Zürich, due to the umlaut) but no false negatives.
FILTER(REGEX(?cityLabel, "\\w\\W+\\w"))
FILTER(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"))
BIND(IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "5+"@en,
IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "4"@en,
IF(REGEX(?cityLabel, "[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}][^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[^\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]+[\\p{IsAlphabetic}\\p{gc=Mn}\\p{gc=Me}\\p{gc=Mc}\\p{IsDigit}\\p{gc=Pc}]"), "3"@en,
"2"@en)))
AS ?words)
}
ORDER BY DESC(?words)
Query found at
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v4("?city"):::projected
v2("?cityLabel"):::projected
v5("?coordinates"):::projected
v3("?state")
v6("?words"):::projected
a1((" "))
c5(["wd:Q486972"]):::iri
f0[["regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;')"]]
f0 --> v2
f1[["regex(?cityLabel,'\w\W+\w')"]]
f1 --> v2
bind2[/"'wd:Q142'"/]
bind2 --as--o v3
v4 --"p:direct/P31"--> a1
a1 --"p:direct/P279"--> c5
v4 --"p:direct/P17"--> v3
v4 --"p:direct/P625"--> v5
v4 --"p:direct/P1448"--> v2
bind3[/"if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s5+^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s4^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',if(regex(?cityLabel,'#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;^\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;+#91;\p{IsAlphabetic}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{IsDigit}\p{gc=Pc}#93;'),s3^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>',s2^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>')))"/]
v2 --o bind3
bind3 --as--o v6