From: "debing.sun" <debing.sun@redis.com>
Date: Wed, 7 May 2025 18:25:06 +0800
Subject: [PATCH] Fix out of bounds write in hyperloglog commands
  (CVE-2025-32023)

Co-authored-by: oranagra <oran@redislabs.com>
---
 src/hyperloglog.c          | 47 +++++++++++++++++++++++++++++++++++++-----
 tests/unit/hyperloglog.tcl | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 2f8c02e..d6a8782 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -579,6 +579,7 @@ int hllSparseToDense(robj *o) {
     struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
     int idx = 0, runlen, regval;
     uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);
+    int valid = 1;
 
     /* If the representation is already the right one return ASAP. */
     hdr = (struct hllhdr*) sparse;
@@ -598,16 +599,27 @@ int hllSparseToDense(robj *o) {
     while(p < end) {
         if (HLL_SPARSE_IS_ZERO(p)) {
             runlen = HLL_SPARSE_ZERO_LEN(p);
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             idx += runlen;
             p++;
         } else if (HLL_SPARSE_IS_XZERO(p)) {
             runlen = HLL_SPARSE_XZERO_LEN(p);
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             idx += runlen;
             p += 2;
         } else {
             runlen = HLL_SPARSE_VAL_LEN(p);
             regval = HLL_SPARSE_VAL_VALUE(p);
-            if ((runlen + idx) > HLL_REGISTERS) break; /* Overflow. */
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             while(runlen--) {
                 HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
                 idx++;
@@ -618,7 +630,7 @@ int hllSparseToDense(robj *o) {
 
     /* If the sparse representation was valid, we expect to find idx
      * set to HLL_REGISTERS. */
-    if (idx != HLL_REGISTERS) {
+    if (!valid || idx != HLL_REGISTERS) {
         sdsfree(dense);
         return C_ERR;
     }
@@ -915,27 +927,40 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
 void hllSparseRegHisto(uint8_t *sparse, int sparselen, int *invalid, int* reghisto) {
     int idx = 0, runlen, regval;
     uint8_t *end = sparse+sparselen, *p = sparse;
+    int valid = 1;
 
     while(p < end) {
         if (HLL_SPARSE_IS_ZERO(p)) {
             runlen = HLL_SPARSE_ZERO_LEN(p);
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             idx += runlen;
             reghisto[0] += runlen;
             p++;
         } else if (HLL_SPARSE_IS_XZERO(p)) {
             runlen = HLL_SPARSE_XZERO_LEN(p);
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             idx += runlen;
             reghisto[0] += runlen;
             p += 2;
         } else {
             runlen = HLL_SPARSE_VAL_LEN(p);
             regval = HLL_SPARSE_VAL_VALUE(p);
+            if ((runlen + idx) > HLL_REGISTERS) { /* Overflow. */
+                valid = 0;
+                break;
+            }
             idx += runlen;
             reghisto[regval] += runlen;
             p++;
         }
     }
-    if (idx != HLL_REGISTERS && invalid) *invalid = 1;
+    if ((!valid || idx != HLL_REGISTERS) && invalid) *invalid = 1;
 }
 
 /* ========================= HyperLogLog Count ==============================
@@ -1204,22 +1229,34 @@ int hllMerge(uint8_t *max, robj *hll) {
     } else {
         uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr);
         long runlen, regval;
+        int valid = 1;
 
         p += HLL_HDR_SIZE;
         i = 0;
         while(p < end) {
             if (HLL_SPARSE_IS_ZERO(p)) {
                 runlen = HLL_SPARSE_ZERO_LEN(p);
+                if ((runlen + i) > HLL_REGISTERS) { /* Overflow. */
+                    valid = 0;
+                    break;
+                }
                 i += runlen;
                 p++;
             } else if (HLL_SPARSE_IS_XZERO(p)) {
                 runlen = HLL_SPARSE_XZERO_LEN(p);
+                if ((runlen + i) > HLL_REGISTERS) { /* Overflow. */
+                    valid = 0;
+                    break;
+                }
                 i += runlen;
                 p += 2;
             } else {
                 runlen = HLL_SPARSE_VAL_LEN(p);
                 regval = HLL_SPARSE_VAL_VALUE(p);
-                if ((runlen + i) > HLL_REGISTERS) break; /* Overflow. */
+                if ((runlen + i) > HLL_REGISTERS) { /* Overflow. */
+                    valid = 0;
+                    break;
+                }
                 while(runlen--) {
                     if (regval > max[i]) max[i] = regval;
                     i++;
@@ -1227,7 +1264,7 @@ int hllMerge(uint8_t *max, robj *hll) {
                 p++;
             }
         }
-        if (i != HLL_REGISTERS) return C_ERR;
+        if (!valid || i != HLL_REGISTERS) return C_ERR;
     }
     return C_OK;
 }
diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl
index f1bbeac..76c0a8d 100644
--- a/tests/unit/hyperloglog.tcl
+++ b/tests/unit/hyperloglog.tcl
@@ -137,6 +137,57 @@ start_server {tags {"hll"}} {
         set e
     } {*WRONGTYPE*}
 
+    test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with XZERO opcode} {
+        r del hll
+        
+        # Create a sparse-encoded HyperLogLog header
+        set pl [string cat "HYLL" [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]]
+
+        # Create an XZERO opcode with the maximum run length of 16384(2^14)
+        set runlen [expr 16384 - 1]
+        set chunk [binary format cc [expr {0b01000000 | ($runlen >> 8)}] [expr {$runlen & 0xff}]]
+        # Fill the HLL with more than 131072(2^17) XZERO opcodes to make the total
+        # run length exceed 4GB, will cause an integer overflow.
+        set repeat [expr 131072 + 1000]
+        for {set i 0} {$i < $repeat} {incr i} {
+            append pl $chunk
+        }
+
+        # Create a VAL opcode with a value that will cause out-of-bounds.
+        append pl [binary format c 0b11111111]
+        r set hll $pl
+
+        # This should not overflow and out-of-bounds.
+        assert_error {*INVALIDOBJ*} {r pfcount hll hll}
+        assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
+        r ping
+    }
+
+    test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with ZERO opcode} {
+        r del hll
+        
+        # Create a sparse-encoded HyperLogLog header
+        set pl [string cat "HYLL" [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]]
+
+        # # Create an ZERO opcode with the maximum run length of 64(2^6)
+        set chunk [binary format c [expr {0b00000000 | 0x3f}]]
+        # Fill the HLL with more than 33554432(2^17) ZERO opcodes to make the total
+        # run length exceed 4GB, will cause an integer overflow.
+        set repeat [expr 33554432 + 1000]
+        for {set i 0} {$i < $repeat} {incr i} {
+            append pl $chunk
+        }
+
+        # Create a VAL opcode with a value that will cause out-of-bounds.
+        append pl [binary format c 0b11111111]
+        r set hll $pl
+
+        # This should not overflow and out-of-bounds.
+        assert_error {*INVALIDOBJ*} {r pfcount hll hll}
+        assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
+        r ping
+    }
+
     test {Corrupted dense HyperLogLogs are detected: Wrong length} {
         r del hll
         r pfadd hll a b c
