query-738f99fabb5b66c1b899ef2381e02c38
Well-distributed random subset, portable version
The optimizer thwarts use of RAND() but UUID() and STRUUID() works,
and surprisingly generating pseudo-randoms with SHA512 is more efficient than anything else
even MD5 and SHA1 suggesting this hashing function is heavily optimized on Blazegraph.
SELECT ?s ?sLabel ?random WITH { SELECT ?s ?random WHERE { ?s wdt:P31 wd:Q5; wdt:P27 wd:Q20. #BIND(STRUUID() AS ?random) BIND(SHA512(CONCAT(STR(RAND()), STR(?s))) AS ?random) } ORDER BY ?random LIMIT 1000 } AS %i WHERE { INCLUDE %i SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } }
Use at
- https://query.wikidata.org/sparql
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX bd: <http://www.bigdata.com/rdf#>
# Well-distributed random subset, portable version
# The optimizer thwarts use of RAND() but UUID() and STRUUID() works,
# and surprisingly generating pseudo-randoms with SHA512 is more efficient than anything else
# even MD5 and SHA1 suggesting this hashing function is heavily optimized on Blazegraph.
SELECT ?s ?sLabel ?random
WHERE {
{ SELECT ?s ?random WHERE {
?s wdt:P31 wd:Q5;
wdt:P27 wd:Q20.
#BIND(STRUUID() AS ?random)
BIND(SHA512(CONCAT(STR(RAND()), STR(?s))) AS ?random)
} ORDER BY ?random LIMIT 1000 } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
Query found at
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v3("?random"):::projected
v2("?s"):::projected
c6(["bd:serviceParam"]):::iri
c2(["wd:Q5"]):::iri
c4(["wd:Q20"]):::iri
c8(["#91;AUTO_LANGUAGE#93;,en"]):::literal
v2 --"wdt:P31"--> c2
v2 --"wdt:P27"--> c4
bind0[/"SHA512(concat(str(RAND()),str(?s)))"/]
v2 --o bind0
bind0 --as--o v3
subgraph s1["http://wikiba.se/ontology#label"]
style s1 stroke-width:4px;
c6 --"wikibase:language"--> c8
end