query-738f99fabb5b66c1b899ef2381e02c38

rq turtle/ttl

Well-distributed random subset, portable version

The optimizer thwarts use of RAND() but UUID() and STRUUID() works,

and surprisingly generating pseudo-randoms with SHA512 is more efficient than anything else

even MD5 and SHA1 suggesting this hashing function is heavily optimized on Blazegraph.

SELECT ?s ?sLabel ?random WITH { SELECT ?s ?random WHERE { ?s wdt:P31 wd:Q5; wdt:P27 wd:Q20. #BIND(STRUUID() AS ?random) BIND(SHA512(CONCAT(STR(RAND()), STR(?s))) AS ?random) } ORDER BY ?random LIMIT 1000 } AS %i WHERE { INCLUDE %i SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } }

Use at

PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX bd: <http://www.bigdata.com/rdf#>
# Well-distributed random subset, portable version
# The optimizer thwarts use of RAND() but UUID() and STRUUID() works,
# and surprisingly generating pseudo-randoms with SHA512 is more efficient than anything else
# even MD5 and SHA1 suggesting this hashing function is heavily optimized on Blazegraph.
SELECT ?s ?sLabel ?random
WHERE {
   { SELECT ?s ?random WHERE {
  ?s wdt:P31 wd:Q5;
     wdt:P27 wd:Q20.
  #BIND(STRUUID() AS ?random)
  BIND(SHA512(CONCAT(STR(RAND()), STR(?s))) AS ?random)
} ORDER BY ?random LIMIT 1000 }  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}

Query found at

graph TD classDef projected fill:lightgreen; classDef literal fill:orange; classDef iri fill:yellow; v3("?random"):::projected v2("?s"):::projected c6(["bd:serviceParam"]):::iri c2(["wd:Q5"]):::iri c4(["wd:Q20"]):::iri c8(["#91;AUTO_LANGUAGE#93;,en"]):::literal v2 --"wdt:P31"--> c2 v2 --"wdt:P27"--> c4 bind0[/"SHA512(concat(str(RAND()),str(?s)))"/] v2 --o bind0 bind0 --as--o v3 subgraph s1["http://wikiba.se/ontology#label"] style s1 stroke-width:4px; c6 --"wikibase:language"--> c8 end