File: mtf_rle2.go

package info (click to toggle)
golang-github-dsnet-compress 0.0.2~git20230904.39efe44%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,724 kB
  • sloc: sh: 108; makefile: 5
file content (133 lines) | stat: -rw-r--r-- 3,372 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.

package bzip2

import "github.com/dsnet/compress/internal/errors"

// moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
// Any runs of zeros in the encoded output will be replaced by a sequence of
// RUNA and RUNB symbols are encode the length of the run.
//
// The RLE encoding used can actually be encoded to and decoded from using
// normal two's complement arithmetic. The methodology for doing so is below.
//
// Assuming the following:
//
//	num: The value being encoded by RLE encoding.
//	run: A sequence of RUNA and RUNB symbols represented as a binary integer,
//	where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
//	symbols are at the least-significant bit positions.
//	cnt: The number of RUNA and RUNB symbols.
//
// Then the RLE encoding used by bzip2 has this mathematical property:
//
//	num+1 == (1<<cnt) | run
type moveToFront struct {
	dictBuf [256]uint8
	dictLen int

	vals    []byte
	syms    []uint16
	blkSize int
}

func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
	if len(dict) > len(mtf.dictBuf) {
		panicf(errors.Internal, "alphabet too large")
	}
	copy(mtf.dictBuf[:], dict)
	mtf.dictLen = len(dict)
	mtf.blkSize = blkSize
}

func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
	dict := mtf.dictBuf[:mtf.dictLen]
	syms = mtf.syms[:0]

	if len(vals) > mtf.blkSize {
		panicf(errors.Internal, "exceeded block size")
	}

	var lastNum uint32
	for _, val := range vals {
		// Normal move-to-front transform.
		var idx uint8 // Reverse lookup idx in dict
		for di, dv := range dict {
			if dv == val {
				idx = uint8(di)
				break
			}
		}
		copy(dict[1:], dict[:idx])
		dict[0] = val

		// Run-length encoding augmentation.
		if idx == 0 {
			lastNum++
			continue
		}
		if lastNum > 0 {
			for rc := lastNum + 1; rc != 1; rc >>= 1 {
				syms = append(syms, uint16(rc&1))
			}
			lastNum = 0
		}
		syms = append(syms, uint16(idx)+1)
	}
	if lastNum > 0 {
		for rc := lastNum + 1; rc != 1; rc >>= 1 {
			syms = append(syms, uint16(rc&1))
		}
	}
	mtf.syms = syms
	return syms
}

func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
	dict := mtf.dictBuf[:mtf.dictLen]
	vals = mtf.vals[:0]

	var lastCnt uint
	var lastRun uint32
	for _, sym := range syms {
		// Run-length encoding augmentation.
		if sym < 2 {
			lastRun |= uint32(sym) << lastCnt
			lastCnt++
			continue
		}
		if lastCnt > 0 {
			cnt := int((1<<lastCnt)|lastRun) - 1
			if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
				panicf(errors.Corrupted, "run-length decoding exceeded block size")
			}
			for i := cnt; i > 0; i-- {
				vals = append(vals, dict[0])
			}
			lastCnt, lastRun = 0, 0
		}

		// Normal move-to-front transform.
		val := dict[sym-1] // Forward lookup val in dict
		copy(dict[1:], dict[:sym-1])
		dict[0] = val

		if len(vals) >= mtf.blkSize {
			panicf(errors.Corrupted, "run-length decoding exceeded block size")
		}
		vals = append(vals, val)
	}
	if lastCnt > 0 {
		cnt := int((1<<lastCnt)|lastRun) - 1
		if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
			panicf(errors.Corrupted, "run-length decoding exceeded block size")
		}
		for i := cnt; i > 0; i-- {
			vals = append(vals, dict[0])
		}
	}
	mtf.vals = vals
	return vals
}