1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
// Copyright 2023+ Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime/debug"
"github.com/klauspost/compress/dict"
"github.com/klauspost/compress/zstd"
)
var (
wantLenFlag = flag.Int("len", 112<<10, "Specify custom output size")
wantHashBytes = flag.Int("hash", 6, "Hash bytes match length. Minimum match length.")
wantMaxBytes = flag.Int("max", 32<<10, "Max input length to index per input file")
wantOutput = flag.String("o", "dictionary.bin", "Output name")
wantFormat = flag.String("format", "zstd", `Output type. "zstd" "s2" or "raw"`)
wantZstdID = flag.Uint("dictID", 0, "Zstd dictionary ID. Default (0) will be random")
wantZstdCompat = flag.Bool("zcompat", true, "Generate dictionary compatible with zstd 1.5.5 and older")
wantZstdLevel = flag.Int("zlevel", 0, "Zstd compression level. 0-4")
quiet = flag.Bool("q", false, "Do not print progress")
)
func main() {
flag.Parse()
debug.SetGCPercent(25)
o := dict.Options{
MaxDictSize: *wantLenFlag,
HashBytes: *wantHashBytes,
Output: os.Stdout,
ZstdDictID: uint32(*wantZstdID),
ZstdDictCompat: *wantZstdCompat,
ZstdLevel: zstd.EncoderLevel(*wantZstdLevel),
}
if *wantOutput == "" || *quiet {
o.Output = nil
}
var input [][]byte
base := flag.Arg(0)
if base == "" {
flag.Usage()
log.Fatal("no path with files specified")
}
// Index ALL hashes in all files.
err := filepath.Walk(base, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
f, err := os.Open(path)
if err != nil {
log.Print(err)
return nil
}
defer f.Close()
b, err := io.ReadAll(io.LimitReader(f, int64(*wantMaxBytes)))
if len(b) < 8 {
return nil
}
if len(b) == 0 {
return nil
}
input = append(input, b)
if !*quiet {
fmt.Print("\r"+info.Name(), " read...")
}
return nil
})
if err != nil {
log.Fatal(err)
}
if len(input) == 0 {
log.Fatal("no files read")
}
var out []byte
switch *wantFormat {
case "zstd":
out, err = dict.BuildZstdDict(input, o)
case "s2":
out, err = dict.BuildS2Dict(input, o)
case "raw":
out, err = dict.BuildRawDict(input, o)
default:
err = fmt.Errorf("unknown format %q", *wantFormat)
}
if err != nil {
log.Fatal(err)
}
if *wantOutput != "" {
err = os.WriteFile(*wantOutput, out, 0666)
if err != nil {
log.Fatal(err)
}
} else {
_, err = os.Stdout.Write(out)
if err != nil {
log.Fatal(err)
}
}
}
|