query-6b7549a1071c305733df20b1a65a90f7
TODO
Use at
- https://query.wikidata.org/sparql
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT * WHERE {
{
VALUES (?title) {
("a test string to find a way to find the longest three (or so) substrings in a given string, though I would be fine with something like MAX(?substring) as well")
("being able to extract n-grams for n > 1 would be great too")
(" another test string that starts and ends with space characters ")
("writing documentation is useful")
("OneWordTitleInCamelCase")
("Thanks for your help!")
}
}
UNION
{
{
SELECT ?title WHERE {
?work wdt:P1476 ?title. # let's keep the different languages in for the moment and worry about selecting the desired ones later
?work wdt:P31 wd:Q13442814 .
FILTER(REGEX(?title, "\\s+")) # The workflow below assumes the presence of spaces in the title. Need a way to bring in on-word titles but let's worry about that later too.
}
LIMIT 10
}
}
BIND(STRLEN(REPLACE(?title, " ", "")) AS ?titlelength)
BIND(STRBEFORE(?title, " ") AS ?substring1)
BIND(STRLEN(REPLACE(?substring1, " ", "")) AS ?substring1length)
BIND(STRAFTER(?title, " ") AS ?postfix)
BIND(STRLEN(REPLACE(?postfix, " ", "")) AS ?postfixlength)
BIND(STRBEFORE(?postfix, " ") AS ?substring2)
BIND(STRLEN(REPLACE(?substring2, " ", "")) AS ?substring2length)
BIND(STRAFTER(?postfix, ?substring2) AS ?postsubstring2)
BIND(STRLEN(REPLACE(?substring2, " ", "")) AS ?postsubstring2length)
}
ORDER BY DESC(?substring1length) #order by longest first substring
# ORDER BY DESC(?substring2length) #order by longest second substring
# We still need to bind all those substrings together to be able to
# identify the (up to) three longest one per title
Query found at
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v6("?postfix"):::projected
v7("?postfixlength"):::projected
v10("?postsubstring2"):::projected
v11("?postsubstring2length"):::projected
v5("?substring1"):::projected
v6("?substring1length"):::projected
v8("?substring2"):::projected
v9("?substring2length"):::projected
v2("?title"):::projected
v4("?titlelength"):::projected
v3("?work")
c4(["wd:Q13442814"]):::iri
subgraph union0[" Union "]
subgraph union0l[" "]
style union0l fill:#abf,stroke-dasharray: 3 3;
f0[["regex(?title,'\s+')"]]
f0 --> v2
v3 --"wdt:P1476"--> v2
v3 --"wdt:P31"--> c4
end
subgraph union0r[" "]
style union0r fill:#abf,stroke-dasharray: 3 3;
bind1[/VALUES ?title/]
bind1-->v2
bind10(["a test string to find a way to find the longest three (or so) substrings in a given string, though I would be fine with something like MAX(?substring) as well"])
bind10 --> bind1
bind11(["being able to extract n-grams for n > 1 would be great too"])
bind11 --> bind1
bind12([" another test string that starts and ends with space characters "])
bind12 --> bind1
bind13(["writing documentation is useful"])
bind13 --> bind1
bind14(["OneWordTitleInCamelCase"])
bind14 --> bind1
bind15(["Thanks for your help!"])
bind15 --> bind1
end
union0r <== or ==> union0l
end
bind2[/"string-length(replace(?title,' ',''))"/]
v2 --o bind2
bind2 --as--o v4
bind3[/"substring-before(?title,' ')"/]
v2 --o bind3
bind3 --as--o v5
bind4[/"string-length(replace(?substring1,' ',''))"/]
v5 --o bind4
bind4 --as--o v6
bind5[/"substring-after(?title,' ')"/]
v2 --o bind5
bind5 --as--o v6
bind6[/"string-length(replace(?postfix,' ',''))"/]
v6 --o bind6
bind6 --as--o v7
bind7[/"substring-before(?postfix,' ')"/]
v6 --o bind7
bind7 --as--o v8
bind8[/"string-length(replace(?substring2,' ',''))"/]
v8 --o bind8
bind8 --as--o v9
bind9[/"substring-after(?postfix,?substring2)"/]
v6 --o bind9
v8 --o bind9
bind9 --as--o v10
bind10[/"string-length(replace(?substring2,' ',''))"/]
v8 --o bind10
bind10 --as--o v11