1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
|
start_server {tags {"hll"}} {
test {HyperLogLog self test passes} {
catch {r pfselftest} e
set e
} {OK} {needs:pfdebug}
test {PFADD without arguments creates an HLL value} {
r pfadd hll
r exists hll
} {1}
test {Approximated cardinality after creation is zero} {
r pfcount hll
} {0}
test {PFADD returns 1 when at least 1 reg was modified} {
r pfadd hll a b c
} {1}
test {PFADD returns 0 when no reg was modified} {
r pfadd hll a b c
} {0}
test {PFADD works with empty string (regression)} {
r pfadd hll ""
}
# Note that the self test stresses much better the
# cardinality estimation error. We are testing just the
# command implementation itself here.
test {PFCOUNT returns approximated cardinality of set} {
r del hll
set res {}
r pfadd hll 1 2 3 4 5
lappend res [r pfcount hll]
# Call it again to test cached value invalidation.
r pfadd hll 6 7 8 8 9 10
lappend res [r pfcount hll]
set res
} {5 10}
test {HyperLogLogs are promote from sparse to dense} {
r del hll
r config set hll-sparse-max-bytes 3000
set n 0
while {$n < 100000} {
set elements {}
for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
incr n 100
r pfadd hll {*}$elements
set card [r pfcount hll]
set err [expr {abs($card-$n)}]
assert {$err < (double($card)/100)*5}
if {$n < 1000} {
assert {[r pfdebug encoding hll] eq {sparse}}
} elseif {$n > 10000} {
assert {[r pfdebug encoding hll] eq {dense}}
}
}
} {} {needs:pfdebug}
test {Change hll-sparse-max-bytes} {
r config set hll-sparse-max-bytes 3000
r del hll
r pfadd hll a b c d e d g h i j k
assert {[r pfdebug encoding hll] eq {sparse}}
r config set hll-sparse-max-bytes 30
r pfadd hll new_element
assert {[r pfdebug encoding hll] eq {dense}}
} {} {needs:pfdebug}
test {Hyperloglog promote to dense well in different hll-sparse-max-bytes} {
set max(0) 100
set max(1) 500
set max(2) 3000
for {set i 0} {$i < [array size max]} {incr i} {
r config set hll-sparse-max-bytes $max($i)
r del hll
r pfadd hll
set len [r strlen hll]
while {$len <= $max($i)} {
assert {[r pfdebug encoding hll] eq {sparse}}
set elements {}
for {set j 0} {$j < 10} {incr j} { lappend elements [expr rand()]}
r pfadd hll {*}$elements
set len [r strlen hll]
}
assert {[r pfdebug encoding hll] eq {dense}}
}
} {} {needs:pfdebug}
test {HyperLogLog sparse encoding stress test} {
for {set x 0} {$x < 1000} {incr x} {
r del hll1
r del hll2
set numele [randomInt 100]
set elements {}
for {set j 0} {$j < $numele} {incr j} {
lappend elements [expr rand()]
}
# Force dense representation of hll2
r pfadd hll2
r pfdebug todense hll2
r pfadd hll1 {*}$elements
r pfadd hll2 {*}$elements
assert {[r pfdebug encoding hll1] eq {sparse}}
assert {[r pfdebug encoding hll2] eq {dense}}
# Cardinality estimated should match exactly.
assert {[r pfcount hll1] eq [r pfcount hll2]}
}
} {} {needs:pfdebug}
test {Corrupted sparse HyperLogLogs are detected: Additional at tail} {
r del hll
r pfadd hll a b c
r append hll "hello"
set e {}
catch {r pfcount hll} e
set e
} {*INVALIDOBJ*}
test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
r del hll
r pfadd hll a b c
r setrange hll 0 "0123"
set e {}
catch {r pfcount hll} e
set e
} {*WRONGTYPE*}
test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
r del hll
r pfadd hll a b c
r setrange hll 4 "x"
set e {}
catch {r pfcount hll} e
set e
} {*WRONGTYPE*}
test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with XZERO opcode} {
r del hll
# Create a sparse-encoded HyperLogLog header
set header "HYLL"
set payload [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]
set pl [binary format a4a12 $header $payload]
# Create an XZERO opcode with the maximum run length of 16384(2^14)
set runlen [expr 16384 - 1]
set chunk [binary format cc [expr {0b01000000 | ($runlen >> 8)}] [expr {$runlen & 0xff}]]
# Fill the HLL with more than 131072(2^17) XZERO opcodes to make the total
# run length exceed 4GB, will cause an integer overflow.
set repeat [expr 131072 + 1000]
for {set i 0} {$i < $repeat} {incr i} {
append pl $chunk
}
# Create a VAL opcode with a value that will cause out-of-bounds.
append pl [binary format c 0b11111111]
r set hll $pl
# This should not overflow and out-of-bounds.
assert_error {*INVALIDOBJ*} {r pfcount hll hll}
assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
r ping
}
test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with ZERO opcode} {
r del hll
# Create a sparse-encoded HyperLogLog header
set header "HYLL"
set payload [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]
set pl [binary format a4a12 $header $payload]
# # Create an ZERO opcode with the maximum run length of 64(2^6)
set chunk [binary format c [expr {0b00000000 | 0x3f}]]
# Fill the HLL with more than 33554432(2^17) ZERO opcodes to make the total
# run length exceed 4GB, will cause an integer overflow.
set repeat [expr 33554432 + 1000]
for {set i 0} {$i < $repeat} {incr i} {
append pl $chunk
}
# Create a VAL opcode with a value that will cause out-of-bounds.
append pl [binary format c 0b11111111]
r set hll $pl
# This should not overflow and out-of-bounds.
assert_error {*INVALIDOBJ*} {r pfcount hll hll}
assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
r ping
}
test {Corrupted dense HyperLogLogs are detected: Wrong length} {
r del hll
r pfadd hll a b c
r setrange hll 4 "\x00"
set e {}
catch {r pfcount hll} e
set e
} {*WRONGTYPE*}
test {Fuzzing dense/sparse encoding: Redis should always detect errors} {
for {set j 0} {$j < 1000} {incr j} {
r del hll
set items {}
set numitems [randomInt 3000]
for {set i 0} {$i < $numitems} {incr i} {
lappend items [expr {rand()}]
}
r pfadd hll {*}$items
# Corrupt it in some random way.
for {set i 0} {$i < 5} {incr i} {
set len [r strlen hll]
set pos [randomInt $len]
set byte [randstring 1 1 binary]
r setrange hll $pos $byte
# Don't modify more bytes 50% of times
if {rand() < 0.5} break
}
# Use the hyperloglog to check if it crashes
# Redis in some way.
catch {
r pfcount hll
}
}
}
test {PFADD, PFCOUNT, PFMERGE type checking works} {
r set foo{t} bar
catch {r pfadd foo{t} 1} e
assert_match {*WRONGTYPE*} $e
catch {r pfcount foo{t}} e
assert_match {*WRONGTYPE*} $e
catch {r pfmerge bar{t} foo{t}} e
assert_match {*WRONGTYPE*} $e
catch {r pfmerge foo{t} bar{t}} e
assert_match {*WRONGTYPE*} $e
}
test {PFMERGE results on the cardinality of union of sets} {
r del hll{t} hll1{t} hll2{t} hll3{t}
r pfadd hll1{t} a b c
r pfadd hll2{t} b c d
r pfadd hll3{t} c d e
r pfmerge hll{t} hll1{t} hll2{t} hll3{t}
r pfcount hll{t}
} {5}
test {PFMERGE on missing source keys will create an empty destkey} {
r del sourcekey{t} sourcekey2{t} destkey{t} destkey2{t}
assert_equal {OK} [r pfmerge destkey{t} sourcekey{t}]
assert_equal 1 [r exists destkey{t}]
assert_equal 0 [r pfcount destkey{t}]
assert_equal {OK} [r pfmerge destkey2{t} sourcekey{t} sourcekey2{t}]
assert_equal 1 [r exists destkey2{t}]
assert_equal 0 [r pfcount destkey{t}]
}
test {PFMERGE with one empty input key, create an empty destkey} {
r del destkey
assert_equal {OK} [r pfmerge destkey]
assert_equal 1 [r exists destkey]
assert_equal 0 [r pfcount destkey]
}
test {PFMERGE with one non-empty input key, dest key is actually one of the source keys} {
r del destkey
assert_equal 1 [r pfadd destkey a b c]
assert_equal {OK} [r pfmerge destkey]
assert_equal 1 [r exists destkey]
assert_equal 3 [r pfcount destkey]
}
test {PFMERGE results with simd} {
r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 2000} {incr x} {
r pfadd hll1{t} [expr rand()]
}
for {set x 1} {$x < 4000} {incr x} {
r pfadd hll2{t} [expr rand()]
}
for {set x 1} {$x < 8000} {incr x} {
r pfadd hll3{t} [expr rand()]
}
assert {[r pfcount hll1{t}] > 0}
assert {[r pfcount hll2{t}] > 0}
assert {[r pfcount hll3{t}] > 0}
r pfdebug simd off
set scalar [r pfcount hll1{t} hll2{t} hll3{t}]
r pfdebug simd on
set simd [r pfcount hll1{t} hll2{t} hll3{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
r pfdebug simd off
r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t}
r pfdebug simd on
r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t}
set scalar [r pfcount hllscalar{t}]
set simd [r pfcount hllsimd{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
set scalar [r get hllscalar{t}]
set simd [r get hllsimd{t}]
assert_equal $scalar $simd
} {} {needs:pfdebug}
test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 10000} {incr x} {
r pfadd hll1{t} "foo-$x"
r pfadd hll2{t} "bar-$x"
r pfadd hll3{t} "zap-$x"
set card [r pfcount hll1{t} hll2{t} hll3{t}]
set realcard [expr {$x*3}]
set err [expr {abs($card-$realcard)}]
assert {$err < (double($card)/100)*5}
}
}
test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
r del hll1{t} hll2{t} hll3{t}
set elements {}
for {set x 1} {$x < 10000} {incr x} {
for {set j 1} {$j <= 3} {incr j} {
set rint [randomInt 20000]
r pfadd hll$j{t} $rint
lappend elements $rint
}
}
set realcard [llength [lsort -unique $elements]]
set card [r pfcount hll1{t} hll2{t} hll3{t}]
set err [expr {abs($card-$realcard)}]
assert {$err < (double($card)/100)*5}
}
test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
r del hll
r pfadd hll 1 2 3
llength [r pfdebug getreg hll]
} {16384} {needs:pfdebug}
test {PFADD / PFCOUNT cache invalidation works} {
r del hll
r pfadd hll a b c
r pfcount hll
assert {[r getrange hll 15 15] eq "\x00"}
r pfadd hll a b c
assert {[r getrange hll 15 15] eq "\x00"}
r pfadd hll 1 2 3
assert {[r getrange hll 15 15] eq "\x80"}
}
test {PFADD with 2GB entry should not crash server due to overflow in MurmurHash64A} {
r config set proto-max-bulk-len 3221225472
r config set client-query-buffer-limit 3221225472
r write "*3\r\n\$5\r\nPFADD\r\n\$3\r\nhll\r\n"
write_big_bulk 2147483648;
r ping
} {PONG} {large-memory}
}
|