1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
## A BiTable is a table that can be seen as an optimized pair
## of `(Table[LitId, Val], Table[Val, LitId])`.
import std/hashes
import rodfiles
when defined(nimPreviewSlimSystem):
import std/assertions
type
LitId* = distinct uint32
BiTable*[T] = object
vals: seq[T] # indexed by LitId
keys: seq[LitId] # indexed by hash(val)
proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[])
proc nextTry(h, maxHash: Hash): Hash {.inline.} =
result = (h + 1) and maxHash
template maxHash(t): untyped = high(t.keys)
template isFilled(x: LitId): bool = x.uint32 > 0'u32
proc `$`*(x: LitId): string {.borrow.}
proc `<`*(x, y: LitId): bool {.borrow.}
proc `<=`*(x, y: LitId): bool {.borrow.}
proc `==`*(x, y: LitId): bool {.borrow.}
proc hash*(x: LitId): Hash {.borrow.}
proc len*[T](t: BiTable[T]): int = t.vals.len
proc mustRehash(length, counter: int): bool {.inline.} =
assert(length > counter)
result = (length * 2 < counter * 3) or (length - counter < 4)
const
idStart = 1
template idToIdx(x: LitId): int = x.int - idStart
proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
let idx = idToIdx(x)
result = idx >= 0 and idx < t.vals.len
proc enlarge[T](t: var BiTable[T]) =
var n: seq[LitId]
newSeq(n, len(t.keys) * 2)
swap(t.keys, n)
for i in 0..high(n):
let eh = n[i]
if isFilled(eh):
var j = hash(t.vals[idToIdx eh]) and maxHash(t)
while isFilled(t.keys[j]):
j = nextTry(j, maxHash(t))
t.keys[j] = move n[i]
proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
let origH = hash(v)
var h = origH and maxHash(t)
if t.keys.len != 0:
while true:
let litId = t.keys[h]
if not isFilled(litId): break
if t.vals[idToIdx t.keys[h]] == v: return litId
h = nextTry(h, maxHash(t))
return LitId(0)
proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
let origH = hash(v)
var h = origH and maxHash(t)
if t.keys.len != 0:
while true:
let litId = t.keys[h]
if not isFilled(litId): break
if t.vals[idToIdx t.keys[h]] == v: return litId
h = nextTry(h, maxHash(t))
# not found, we need to insert it:
if mustRehash(t.keys.len, t.vals.len):
enlarge(t)
# recompute where to insert:
h = origH and maxHash(t)
while true:
let litId = t.keys[h]
if not isFilled(litId): break
h = nextTry(h, maxHash(t))
else:
setLen(t.keys, 16)
h = origH and maxHash(t)
result = LitId(t.vals.len + idStart)
t.keys[h] = result
t.vals.add v
proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
let idx = idToIdx litId
assert idx < t.vals.len
result = t.vals[idx]
proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
let idx = idToIdx litId
assert idx < t.vals.len
result = t.vals[idx]
proc hash*[T](t: BiTable[T]): Hash =
## as the keys are hashes of the values, we simply use them instead
var h: Hash = 0
for i, n in pairs t.keys:
h = h !& hash((i, n))
result = !$h
proc store*[T](f: var RodFile; t: BiTable[T]) =
storeSeq(f, t.vals)
storeSeq(f, t.keys)
proc load*[T](f: var RodFile; t: var BiTable[T]) =
loadSeq(f, t.vals)
loadSeq(f, t.keys)
proc sizeOnDisc*(t: BiTable[string]): int =
result = 4
for x in t.vals:
result += x.len + 4
result += t.keys.len * sizeof(LitId)
when isMainModule:
var t: BiTable[string]
echo getOrIncl(t, "hello")
echo getOrIncl(t, "hello")
echo getOrIncl(t, "hello3")
echo getOrIncl(t, "hello4")
echo getOrIncl(t, "helloasfasdfdsa")
echo getOrIncl(t, "hello")
echo getKeyId(t, "hello")
echo getKeyId(t, "none")
for i in 0 ..< 100_000:
discard t.getOrIncl($i & "___" & $i)
for i in 0 ..< 100_000:
assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
echo "begin"
echo t.vals.len
echo t.vals[0]
echo t.vals[1004]
echo "middle"
var tf: BiTable[float]
discard tf.getOrIncl(0.4)
discard tf.getOrIncl(16.4)
discard tf.getOrIncl(32.4)
echo getKeyId(tf, 32.4)
var f2 = open("testblah.bin", fmWrite)
echo store(f2, tf)
f2.close
var f1 = open("testblah.bin", fmRead)
var t2: BiTable[float]
echo f1.load(t2)
echo t2.vals.len
echo getKeyId(t2, 32.4)
echo "end"
f1.close
|