1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
|
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bitutils
import (
"encoding/binary"
"math/bits"
"github.com/apache/arrow-go/v18/arrow/bitutil"
"github.com/apache/arrow-go/v18/internal/utils"
)
// IsMultipleOf64 returns whether v is a multiple of 64.
func IsMultipleOf64(v int64) bool { return v&63 == 0 }
// LeastSignificantBitMask returns a bit mask to return the least significant
// bits for a value starting from the bit index passed in. ie: if you want a
// mask for the 4 least significant bits, you call LeastSignificantBitMask(4)
func LeastSignificantBitMask(index int64) uint64 {
return (uint64(1) << index) - 1
}
// SetBitRun describes a run of contiguous set bits in a bitmap with Pos being
// the starting position of the run and Length being the number of bits.
type SetBitRun struct {
Pos int64
Length int64
}
// AtEnd returns true if this bit run is the end of the set by checking
// that the length is 0.
func (s SetBitRun) AtEnd() bool {
return s.Length == 0
}
// Equal returns whether rhs is the same run as s
func (s SetBitRun) Equal(rhs SetBitRun) bool {
return s.Pos == rhs.Pos && s.Length == rhs.Length
}
// SetBitRunReader is an interface for reading groups of contiguous set bits
// from a bitmap. The interface allows us to create different reader implementations
// that share the same interface easily such as a reverse set reader.
type SetBitRunReader interface {
// NextRun will return the next run of contiguous set bits in the bitmap
NextRun() SetBitRun
// Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments
// match the New function for the reader being used.
Reset([]byte, int64, int64)
// VisitSetBitRuns calls visitFn for each set in a loop starting from the current position
// it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run
// for each run.
VisitSetBitRuns(visitFn VisitFn) error
}
type baseSetBitRunReader struct {
bitmap []byte
pos int64
length int64
remaining int64
curWord uint64
curNumBits int32
reversed bool
firstBit uint64
}
// NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read
// numvalues bits.
func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
return newBaseSetBitRunReader(validBits, startOffset, numValues, false)
}
// NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will
// return runs starting from the end of the bitmap until it reaches startOffset rather than starting
// at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos
// will still be the position of the "left-most" bit of the run or the "start" of the run. It
// just returns runs starting from the end instead of starting from the beginning.
func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
return newBaseSetBitRunReader(validBits, startOffset, numValues, true)
}
func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader {
ret := &baseSetBitRunReader{reversed: reverse}
ret.Reset(bitmap, startOffset, length)
return ret
}
func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
br.bitmap = bitmap
br.length = length
br.remaining = length
br.curNumBits = 0
br.curWord = 0
if !br.reversed {
br.pos = startOffset / 8
br.firstBit = 1
bitOffset := int8(startOffset % 8)
if length > 0 && bitOffset != 0 {
br.curNumBits = int32(utils.Min(int(length), int(8-bitOffset)))
br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits))
}
return
}
br.pos = (startOffset + length) / 8
br.firstBit = uint64(0x8000000000000000)
endBitOffset := int8((startOffset + length) % 8)
if length > 0 && endBitOffset != 0 {
br.pos++
br.curNumBits = int32(utils.Min(int(length), int(endBitOffset)))
br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits))
}
}
func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 {
if br.reversed {
return word << nbits
}
return word >> nbits
}
func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 {
if br.reversed {
return int32(bits.LeadingZeros64(word))
}
return int32(bits.TrailingZeros64(word))
}
func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 {
var word [8]byte
nbytes := bitutil.BytesForBits(numBits)
if br.reversed {
br.pos -= nbytes
copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes])
return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits)
}
copy(word[:], br.bitmap[br.pos:br.pos+nbytes])
br.pos += nbytes
return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits)
}
func (br *baseSetBitRunReader) findCurrentRun() SetBitRun {
nzeros := br.countFirstZeros(br.curWord)
if nzeros >= br.curNumBits {
br.remaining -= int64(br.curNumBits)
br.curWord = 0
br.curNumBits = 0
return SetBitRun{0, 0}
}
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits -= nzeros
br.remaining -= int64(nzeros)
pos := br.position()
numOnes := br.countFirstZeros(^br.curWord)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
br.remaining -= int64(numOnes)
return SetBitRun{pos, int64(numOnes)}
}
func (br *baseSetBitRunReader) position() int64 {
if br.reversed {
return br.remaining
}
return br.length - br.remaining
}
func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun {
if br.reversed {
run.Pos -= run.Length
}
return run
}
func (br *baseSetBitRunReader) loadFull() (ret uint64) {
if br.reversed {
br.pos -= 8
}
ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8])
if !br.reversed {
br.pos += 8
}
return
}
func (br *baseSetBitRunReader) skipNextZeros() {
for br.remaining >= 64 {
br.curWord = br.loadFull()
nzeros := br.countFirstZeros(br.curWord)
if nzeros < 64 {
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits = 64 - nzeros
br.remaining -= int64(nzeros)
return
}
br.remaining -= 64
}
// run of zeros continues in last bitmap word
if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
nzeros := int32(utils.Min(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits -= nzeros
br.remaining -= int64(nzeros)
}
}
func (br *baseSetBitRunReader) countNextOnes() int64 {
var length int64
if ^br.curWord != 0 {
numOnes := br.countFirstZeros(^br.curWord)
br.remaining -= int64(numOnes)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
if br.curNumBits != 0 {
return int64(numOnes)
}
length = int64(numOnes)
} else {
br.remaining -= 64
br.curNumBits = 0
length = 64
}
for br.remaining >= 64 {
br.curWord = br.loadFull()
numOnes := br.countFirstZeros(^br.curWord)
length += int64(numOnes)
br.remaining -= int64(numOnes)
if numOnes < 64 {
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits = 64 - numOnes
return length
}
}
if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
numOnes := br.countFirstZeros(^br.curWord)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
br.remaining -= int64(numOnes)
length += int64(numOnes)
}
return length
}
func (br *baseSetBitRunReader) NextRun() SetBitRun {
var (
pos int64 = 0
length int64 = 0
)
if br.curNumBits != 0 {
run := br.findCurrentRun()
if run.Length != 0 && br.curNumBits != 0 {
return br.adjustRun(run)
}
pos = run.Pos
length = run.Length
}
if length == 0 {
// we didn't get any ones in curWord, so we can skip any zeros
// in the following words
br.skipNextZeros()
if br.remaining == 0 {
return SetBitRun{0, 0}
}
pos = br.position()
} else if br.curNumBits == 0 {
if br.remaining >= 64 {
br.curWord = br.loadFull()
br.curNumBits = 64
} else if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
} else {
return br.adjustRun(SetBitRun{pos, length})
}
if (br.curWord & br.firstBit) == 0 {
return br.adjustRun(SetBitRun{pos, length})
}
}
length += br.countNextOnes()
return br.adjustRun(SetBitRun{pos, length})
}
// VisitFn is a callback function for visiting runs of contiguous bits
type VisitFn func(pos int64, length int64) error
func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error {
for {
run := br.NextRun()
if run.Length == 0 {
break
}
if err := visitFn(run.Pos, run.Length); err != nil {
return err
}
}
return nil
}
// VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns
func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error {
if bitmap == nil {
return visitFn(0, length)
}
rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
for {
run := rdr.NextRun()
if run.Length == 0 {
break
}
if err := visitFn(run.Pos, run.Length); err != nil {
return err
}
}
return nil
}
func VisitSetBitRunsNoErr(bitmap []byte, bitmapOffset int64, length int64, visitFn func(pos, length int64)) {
if bitmap == nil {
visitFn(0, length)
return
}
rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
for {
run := rdr.NextRun()
if run.Length == 0 {
break
}
visitFn(run.Pos, run.Length)
}
}
|