File: bwt_test.go

package info (click to toggle)
golang-github-dsnet-compress 0.0.2~git20230904.39efe44%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,724 kB
  • sloc: sh: 108; makefile: 5
file content (121 lines) | stat: -rw-r--r-- 4,631 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.

package bzip2

import (
	"testing"

	"github.com/dsnet/compress/internal/testutil"
)

func TestBurrowsWheelerTransform(t *testing.T) {
	vectors := []struct {
		input  []byte // The input test string
		output []byte // Expected output string after BWT
		ptr    int    // The BWT origin pointer
	}{{
		input:  []byte(""),
		output: []byte(""),
		ptr:    -1,
	}, {
		input:  []byte("Hello, world!"),
		output: []byte(",do!lHrellwo "),
		ptr:    3,
	}, {
		input:  []byte("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"),
		output: []byte("TEXYDST.E.IXIXIXXSSMPPS.B..E.S.EUSFXDIIOIIIT"),
		ptr:    29,
	}, {
		input:  []byte("0123456789"),
		output: []byte("9012345678"),
		ptr:    0,
	}, {
		input:  []byte("9876543210"),
		output: []byte("1234567890"),
		ptr:    9,
	}, {
		input:  []byte("The quick brown fox jumped over the lazy dog."),
		output: []byte("kynxederg.l ie hhpv otTu c uwd rfm eb qjoooza"),
		ptr:    9,
	}, {
		input: []byte("" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Mary had a little lamb, its fleece was white as snow" +
			"Nary had a little lamb, its fleece was white as snow"),
		output: []byte("" +
			"dddddddddeeeeeeeeesssssssssyyyyyyyyy,,,,,,,,,eeeeeee" +
			"eeaaaaaaaaassssssssseeeeeeeeesssssssssbbbbbbbbbwwwww" +
			"wwww         hhhhhhhhhlllllllllNMMMMMMMM         www" +
			"wwwwwwmmmmmmmmmeeeeeeeeeaaaaaaaaatttttttttlllllllllc" +
			"cccccccceeeeeeeeelllllllll                  wwwwwwww" +
			"whhhhhhhhh         lllllllll         tttttttttffffff" +
			"fff         aaaaaaaaasssssssssnnnnnnnnnaaaaaaaaatttt" +
			"tttttaaaaaaaaaaaaaaaaaa         iiiiiiiiitttttttttii" +
			"iiiiiiiiiiiiiiiiooooooooo                  rrrrrrrrr"),
		ptr: 99,
	}, {
		input: []byte("" +
			"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGAC" +
			"AGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAA" +
			"TACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATG" +
			"AAACGCATTAGCACCACCATTACCACCACCATCACCACCACCATCACCATTACCATTACCACAG" +
			"GTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCT" +
			"TTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAG" +
			"TGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCA"),
		output: []byte("" +
			"TAGAATAAATGGAGACTCTAATACTCTACTGGAAACAGACCACAAACATACCTGGTCGTAGATT" +
			"CCCCCCATCCCTAAGAAACGAGTCCCCACATCATCACCTCGACTGGGCCGAGACTAAGCCCCCA" +
			"ACTGAACCCCCTTACGAAGGCGGAAGCTCCGCCCTGTAGAAAAGACGAATGCCAACCCCCGTAA" +
			"AAAAAAGAATAAAAGGCGAATAGCGCAATAGGGGAGCAATTTTCGTACTTATAGAGGAGTGATT" +
			"ATTCTTTCTAACACGGTGGACACTAGGCTATTTATTTGCGAAGATTTGGAACGGGCCCACAAAC" +
			"ACTGAGGGACGGATCGATATAGATGCTATCGGTGGGTGGTTTTATAATAAATAAGATATTGGTC" +
			"TTTCACTCCCCTGCAATCAGGCCGGCAGCGAATAAAAGACTTTGCATAGAGCTTTTACTGTTTC"),
		ptr: 99,
	}, {
		input:  testutil.MustLoadFile("testdata/gauntlet_test3.bin"),
		output: testutil.MustLoadFile("testdata/gauntlet_test3.bwt"),
		ptr:    0,
	}, {
		input:  testutil.MustLoadFile("testdata/silesia_ooffice.bin"),
		output: testutil.MustLoadFile("testdata/silesia_ooffice.bwt"),
		ptr:    461,
	}, {
		input:  testutil.MustLoadFile("testdata/silesia_xray.bin"),
		output: testutil.MustLoadFile("testdata/silesia_xray.bwt"),
		ptr:    1532,
	}, {
		input:  testutil.MustLoadFile("testdata/testfiles_test3.bin"),
		output: testutil.MustLoadFile("testdata/testfiles_test3.bwt"),
		ptr:    0,
	}, {
		input:  testutil.MustLoadFile("testdata/testfiles_test4.bin"),
		output: testutil.MustLoadFile("testdata/testfiles_test4.bwt"),
		ptr:    1026,
	}}

	bwt := new(burrowsWheelerTransform)
	for i, v := range vectors {
		output := append([]byte(nil), v.input...)
		ptr := bwt.Encode(output)
		input := append([]byte(nil), v.output...)
		bwt.Decode(input, ptr)

		if got, want, ok := testutil.BytesCompare(input, v.input); !ok {
			t.Errorf("test %d, input mismatch:\ngot  %s\nwant %s", i, got, want)
		}
		if got, want, ok := testutil.BytesCompare(output, v.output); !ok {
			t.Errorf("test %d, output mismatch:\ngot  %s\nwant %s", i, got, want)
		}
		if ptr != v.ptr {
			t.Errorf("test %d, pointer mismatch: got %d, want %d", i, ptr, v.ptr)
		}
	}
}