File: lzw.rb

package info (click to toggle)
ruby-pdf-reader 2.15.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,512 kB
  • sloc: ruby: 11,959; sh: 46; makefile: 11
file content (155 lines) | stat: -rw-r--r-- 4,835 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# coding: utf-8
# typed: strict
# frozen_string_literal: true

module PDF

  class Reader

    # A general class for decoding LZW compressed data. LZW can be
    # used in PDF files to compresses streams, usually for image data sourced
    # from a TIFF file.
    #
    # See the following links for more information:
    #
    #   ref http://www.fileformat.info/format/tiff/corion-lzw.htm
    #   ref http://marknelson.us/1989/10/01/lzw-data-compression/
    #
    # The PDF spec also has some data on the algorithm.
    #
    class LZW # :nodoc:

      # Wraps an LZW encoded string
      class BitStream # :nodoc:

        #: (String, Integer) -> void
        def initialize(data, bits_in_chunk)
          @data = data
          @data.force_encoding("BINARY")
          @current_pos = 0 #: Integer
          @bits_left_in_byte = 8 #: Integer
          @bits_in_chunk = 0 #: Integer
          set_bits_in_chunk(bits_in_chunk)
        end

        #: (Integer) -> void
        def set_bits_in_chunk(bits_in_chunk)
          raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12

          @bits_in_chunk = bits_in_chunk
        end

        #: () -> Integer
        def read
          bits_left_in_chunk = @bits_in_chunk
          chunk = -1
          while bits_left_in_chunk > 0 and @current_pos < @data.size
            chunk = 0 if chunk < 0
            codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
            current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
            dif = bits_left_in_chunk - @bits_left_in_byte
            if dif > 0 then  current_byte <<= dif
            elsif dif < 0 then  current_byte >>= dif.abs
            end
            chunk |= current_byte #add bits to result
            bits_left_in_chunk = if dif >= 0 then dif else 0 end
            @bits_left_in_byte = if dif < 0 then dif.abs else 0 end
            if @bits_left_in_byte.zero? #next byte
              @current_pos += 1
              @bits_left_in_byte = 8
            end
          end
          chunk
        end
      end

      CODE_EOD = 257 #: Integer #end of data
      CODE_CLEAR_TABLE = 256 #: Integer #clear table

      # stores de pairs code => string
      class StringTable
        #: Integer
        attr_reader :string_table_pos

        #: () -> void
        def initialize
          @data = Hash.new #: Hash[Integer, String]
          # The initial code
          @string_table_pos = 258 #: Integer
        end

        #if code less than 258 return fixed string
        #: (Integer) -> String?
        def [](key)
          if key > 257
            @data[key]
          else
            key.chr
          end
        end

        #: (String) -> void
        def add(string)
          @data.store(@string_table_pos, string)
          @string_table_pos += 1
        end
      end

      # Decompresses a LZW compressed string.
      #
      #: (String) -> String
      def self.decode(data)
        stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
        string_table = StringTable.new
        result = "".dup
        until (code = stream.read) == CODE_EOD
          if code == CODE_CLEAR_TABLE
            stream.set_bits_in_chunk(9)
            string_table = StringTable.new
            code = stream.read
            break if code == CODE_EOD
            result << string_table[code]
            old_code = code
          else
            string = string_table[code]
            if string
              result << string
              string_table.add create_new_string(string_table, old_code, code)
              old_code = code
            else
              new_string = create_new_string(string_table, old_code, old_code)
              result << new_string
              string_table.add new_string
              old_code = code
            end
            #increase de size of the codes when limit reached
            if string_table.string_table_pos == 511
              stream.set_bits_in_chunk(10)
            elsif string_table.string_table_pos == 1023
              stream.set_bits_in_chunk(11)
            elsif string_table.string_table_pos == 2047
              stream.set_bits_in_chunk(12)
            end
          end
        end
        result
      end

      #: (PDF::Reader::LZW::StringTable, Integer?, Integer?) -> String
      def self.create_new_string(string_table, some_code, other_code)
        raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?

        item_one = string_table[some_code]
        item_two = string_table[other_code]

        if item_one && item_two
          item_one + item_two.chr
        else
          raise MalformedPDFError, "invalid LZW data"
        end
      end
      private_class_method :create_new_string

    end
  end
end