File: huffman.hpp

package info (click to toggle)
ares 126-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 32,600 kB
  • sloc: cpp: 356,508; ansic: 20,394; makefile: 16; sh: 2
file content (84 lines) | stat: -rw-r--r-- 2,355 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#pragma once

namespace nall::Encode {

inline auto Huffman(array_view<u8> input) -> vector<u8> {
  vector<u8> output;
  for(u32 byte : range(8)) output.append(input.size() >> byte * 8);

  struct Node {
    u32 frequency = 0;
    u32 parent = 0;
    u32 lhs = 0;
    u32 rhs = 0;
  };
  array<Node[512]> nodes;
  for(u32 offset : range(input.size())) nodes[input[offset]].frequency++;

  u32 count = 0;
  for(u32 offset : range(511)) {
    if(nodes[offset].frequency) count++;
    else nodes[offset].parent = 511;
  }

  auto minimum = [&] {
    u32 frequency = ~0, minimum = 511;
    for(u32 index : range(511)) {
      if(!nodes[index].parent && nodes[index].frequency && nodes[index].frequency < frequency) {
        frequency = nodes[index].frequency;
        minimum = index;
      }
    }
    return minimum;
  };

  //group the least two frequently used nodes until only one node remains
  u32 index = 256;
  for(u32 remaining = max(2, count); remaining >= 2; remaining--) {
    u32 lhs = minimum();
    nodes[lhs].parent = index;
    u32 rhs = minimum();
    nodes[rhs].parent = index;
    if(remaining == 2) index = nodes[lhs].parent = nodes[rhs].parent = 511;
    nodes[index].lhs = lhs;
    nodes[index].rhs = rhs;
    nodes[index].parent = 0;
    nodes[index].frequency = nodes[lhs].frequency + nodes[rhs].frequency;
    index++;
  }

  u32 byte = 0, bits = 0;
  auto write = [&](bool bit) {
    byte = byte << 1 | bit;
    if(++bits == 8) output.append(byte), bits = 0;
  };

  //only the upper half of the table is needed for decompression
  //the first 256 nodes are always treated as leaf nodes
  for(u32 offset : range(256)) {
    for(u32 index : reverse(range(9))) write(nodes[256 + offset].lhs >> index & 1);
    for(u32 index : reverse(range(9))) write(nodes[256 + offset].rhs >> index & 1);
  }

  for(u32 byte : input) {
    u32 node = byte, length = 0;
    u256 sequence = 0;
    //traversing the array produces the bitstream in reverse order
    do {
      u32 parent = nodes[node].parent;
      bool bit = nodes[nodes[node].parent].rhs == node;
      sequence = sequence << 1 | bit;
      length++;
      node = parent;
    } while(node != 511);
    //output the generated bits in the correct order
    for(u32 index : range(length)) {
      write(sequence >> index & 1);
    }
  }
  while(bits) write(0);

  return output;
}

}