query-6b7549a1071c305733df20b1a65a90f7

rq turtle/ttl

TODO

Use at

PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT * WHERE {
  {
    VALUES (?title) {
      ("a test string to find a way to find the longest three (or so) substrings in a given string, though I would be fine with something like MAX(?substring) as well")
      ("being able to extract n-grams for n > 1 would be great too")
      (" another test string that starts and ends with space characters ")
      ("writing documentation is useful")
      ("OneWordTitleInCamelCase")
      ("Thanks for your help!")
    }
  }
  UNION
  {
    {
      SELECT ?title WHERE {
        ?work wdt:P1476 ?title. # let's keep the different languages in for the moment and worry about selecting the desired ones later
        ?work wdt:P31 wd:Q13442814 .
        FILTER(REGEX(?title, "\\s+"))   # The workflow below assumes the presence of spaces in the title. Need a way to bring in on-word titles but let's worry about that later too.

      }
      LIMIT 10
    }
  }
  BIND(STRLEN(REPLACE(?title, " ", "")) AS ?titlelength)

  BIND(STRBEFORE(?title, " ") AS ?substring1)
  BIND(STRLEN(REPLACE(?substring1, " ", "")) AS ?substring1length)
  BIND(STRAFTER(?title, " ") AS ?postfix)
  BIND(STRLEN(REPLACE(?postfix, " ", "")) AS ?postfixlength)
  BIND(STRBEFORE(?postfix, " ") AS ?substring2)
  BIND(STRLEN(REPLACE(?substring2, " ", "")) AS ?substring2length)
  BIND(STRAFTER(?postfix, ?substring2) AS ?postsubstring2)
  BIND(STRLEN(REPLACE(?substring2, " ", "")) AS ?postsubstring2length)
}
ORDER BY DESC(?substring1length) #order by longest first substring
# ORDER BY DESC(?substring2length) #order by longest second substring

# We still need to bind all those substrings together to be able to 
# identify the (up to) three longest one per title

Query found at

graph TD classDef projected fill:lightgreen; classDef literal fill:orange; classDef iri fill:yellow; v6("?postfix"):::projected v7("?postfixlength"):::projected v10("?postsubstring2"):::projected v11("?postsubstring2length"):::projected v5("?substring1"):::projected v6("?substring1length"):::projected v8("?substring2"):::projected v9("?substring2length"):::projected v2("?title"):::projected v4("?titlelength"):::projected v3("?work") c4(["wd:Q13442814"]):::iri subgraph union0[" Union "] subgraph union0l[" "] style union0l fill:#abf,stroke-dasharray: 3 3; f0[["regex(?title,'\s+')"]] f0 --> v2 v3 --"wdt:P1476"--> v2 v3 --"wdt:P31"--> c4 end subgraph union0r[" "] style union0r fill:#abf,stroke-dasharray: 3 3; bind1[/VALUES ?title/] bind1-->v2 bind10(["a test string to find a way to find the longest three (or so) substrings in a given string, though I would be fine with something like MAX(?substring) as well"]) bind10 --> bind1 bind11(["being able to extract n-grams for n > 1 would be great too"]) bind11 --> bind1 bind12([" another test string that starts and ends with space characters "]) bind12 --> bind1 bind13(["writing documentation is useful"]) bind13 --> bind1 bind14(["OneWordTitleInCamelCase"]) bind14 --> bind1 bind15(["Thanks for your help!"]) bind15 --> bind1 end union0r <== or ==> union0l end bind2[/"string-length(replace(?title,' ',''))"/] v2 --o bind2 bind2 --as--o v4 bind3[/"substring-before(?title,' ')"/] v2 --o bind3 bind3 --as--o v5 bind4[/"string-length(replace(?substring1,' ',''))"/] v5 --o bind4 bind4 --as--o v6 bind5[/"substring-after(?title,' ')"/] v2 --o bind5 bind5 --as--o v6 bind6[/"string-length(replace(?postfix,' ',''))"/] v6 --o bind6 bind6 --as--o v7 bind7[/"substring-before(?postfix,' ')"/] v6 --o bind7 bind7 --as--o v8 bind8[/"string-length(replace(?substring2,' ',''))"/] v8 --o bind8 bind8 --as--o v9 bind9[/"substring-after(?postfix,?substring2)"/] v6 --o bind9 v8 --o bind9 bind9 --as--o v10 bind10[/"string-length(replace(?substring2,' ',''))"/] v8 --o bind10 bind10 --as--o v11