1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
package gozstd
/*
#cgo CFLAGS: -O3
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#define ZDICT_STATIC_LINKING_ONLY
#include <zdict.h>
#include <stdint.h> // for uintptr_t
// The following *_wrapper functions allow avoiding memory allocations
// durting calls from Go.
// See https://github.com/golang/go/issues/24450 .
static ZSTD_CDict* ZSTD_createCDict_wrapper(uintptr_t dictBuffer, size_t dictSize, int compressionLevel) {
return ZSTD_createCDict((const void *)dictBuffer, dictSize, compressionLevel);
}
static ZSTD_DDict* ZSTD_createDDict_wrapper(uintptr_t dictBuffer, size_t dictSize) {
return ZSTD_createDDict((const void *)dictBuffer, dictSize);
}
*/
import "C"
import (
"fmt"
"runtime"
"sync"
"unsafe"
)
const minDictLen = C.ZDICT_DICTSIZE_MIN
// BuildDict returns dictionary built from the given samples.
//
// The resulting dictionary size will be close to desiredDictLen.
//
// The returned dictionary may be passed to NewCDict* and NewDDict.
func BuildDict(samples [][]byte, desiredDictLen int) []byte {
if desiredDictLen < minDictLen {
desiredDictLen = minDictLen
}
dict := make([]byte, desiredDictLen)
// Calculate the total samples size.
samplesBufLen := 0
for _, sample := range samples {
if len(sample) == 0 {
// Skip empty samples.
continue
}
samplesBufLen += len(sample)
}
// Construct flat samplesBuf and samplesSizes.
samplesBuf := make([]byte, 0, samplesBufLen)
samplesSizes := make([]C.size_t, 0, len(samples))
for _, sample := range samples {
samplesBuf = append(samplesBuf, sample...)
samplesSizes = append(samplesSizes, C.size_t(len(sample)))
}
// Add fake samples if the original samples are too small.
minSamplesBufLen := int(C.ZDICT_CONTENTSIZE_MIN)
if minSamplesBufLen < minDictLen {
minSamplesBufLen = minDictLen
}
for samplesBufLen < minSamplesBufLen {
fakeSample := []byte(fmt.Sprintf("this is a fake sample %d", samplesBufLen))
samplesBuf = append(samplesBuf, fakeSample...)
samplesSizes = append(samplesSizes, C.size_t(len(fakeSample)))
samplesBufLen += len(fakeSample)
}
// Run ZDICT_trainFromBuffer under lock, since it looks like it
// is unsafe for concurrent usage (it just randomly crashes).
// TODO: remove this restriction.
buildDictLock.Lock()
result := C.ZDICT_trainFromBuffer(
unsafe.Pointer(&dict[0]),
C.size_t(len(dict)),
unsafe.Pointer(&samplesBuf[0]),
&samplesSizes[0],
C.unsigned(len(samplesSizes)))
buildDictLock.Unlock()
if C.ZDICT_isError(result) != 0 {
// Return empty dictionary, since the original samples are too small.
return nil
}
dictLen := int(result)
return dict[:dictLen]
}
var buildDictLock sync.Mutex
// CDict is a dictionary used for compression.
//
// A single CDict may be re-used in concurrently running goroutines.
type CDict struct {
p *C.ZSTD_CDict
compressionLevel int
}
// NewCDict creates new CDict from the given dict.
//
// Call Release when the returned dict is no longer used.
func NewCDict(dict []byte) (*CDict, error) {
return NewCDictLevel(dict, DefaultCompressionLevel)
}
// NewCDictLevel creates new CDict from the given dict
// using the given compressionLevel.
//
// Call Release when the returned dict is no longer used.
func NewCDictLevel(dict []byte, compressionLevel int) (*CDict, error) {
if len(dict) == 0 {
return nil, fmt.Errorf("dict cannot be empty")
}
cd := &CDict{
p: C.ZSTD_createCDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(&dict[0]))),
C.size_t(len(dict)),
C.int(compressionLevel)),
compressionLevel: compressionLevel,
}
// Prevent from GC'ing of dict during CGO call above.
runtime.KeepAlive(dict)
runtime.SetFinalizer(cd, freeCDict)
return cd, nil
}
// Release releases resources occupied by cd.
//
// cd cannot be used after the release.
func (cd *CDict) Release() {
if cd.p == nil {
return
}
result := C.ZSTD_freeCDict(cd.p)
ensureNoError("ZSTD_freeCDict", result)
cd.p = nil
}
func freeCDict(v interface{}) {
v.(*CDict).Release()
}
// DDict is a dictionary used for decompression.
//
// A single DDict may be re-used in concurrently running goroutines.
type DDict struct {
p *C.ZSTD_DDict
}
// NewDDict creates new DDict from the given dict.
//
// Call Release when the returned dict is no longer needed.
func NewDDict(dict []byte) (*DDict, error) {
if len(dict) == 0 {
return nil, fmt.Errorf("dict cannot be empty")
}
dd := &DDict{
p: C.ZSTD_createDDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(&dict[0]))),
C.size_t(len(dict))),
}
// Prevent from GC'ing of dict during CGO call above.
runtime.KeepAlive(dict)
runtime.SetFinalizer(dd, freeDDict)
return dd, nil
}
// Release releases resources occupied by dd.
//
// dd cannot be used after the release.
func (dd *DDict) Release() {
if dd.p == nil {
return
}
result := C.ZSTD_freeDDict(dd.p)
ensureNoError("ZSTD_freeDDict", result)
dd.p = nil
}
func freeDDict(v interface{}) {
v.(*DDict).Release()
}
|