1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
# coding: utf-8
# typed: strict
# frozen_string_literal: true
module PDF
class Reader
# A general class for decoding LZW compressed data. LZW can be
# used in PDF files to compresses streams, usually for image data sourced
# from a TIFF file.
#
# See the following links for more information:
#
# ref http://www.fileformat.info/format/tiff/corion-lzw.htm
# ref http://marknelson.us/1989/10/01/lzw-data-compression/
#
# The PDF spec also has some data on the algorithm.
#
class LZW # :nodoc:
# Wraps an LZW encoded string
class BitStream # :nodoc:
#: (String, Integer) -> void
def initialize(data, bits_in_chunk)
@data = data
@data.force_encoding("BINARY")
@current_pos = 0 #: Integer
@bits_left_in_byte = 8 #: Integer
@bits_in_chunk = 0 #: Integer
set_bits_in_chunk(bits_in_chunk)
end
#: (Integer) -> void
def set_bits_in_chunk(bits_in_chunk)
raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12
@bits_in_chunk = bits_in_chunk
end
#: () -> Integer
def read
bits_left_in_chunk = @bits_in_chunk
chunk = -1
while bits_left_in_chunk > 0 and @current_pos < @data.size
chunk = 0 if chunk < 0
codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
dif = bits_left_in_chunk - @bits_left_in_byte
if dif > 0 then current_byte <<= dif
elsif dif < 0 then current_byte >>= dif.abs
end
chunk |= current_byte #add bits to result
bits_left_in_chunk = if dif >= 0 then dif else 0 end
@bits_left_in_byte = if dif < 0 then dif.abs else 0 end
if @bits_left_in_byte.zero? #next byte
@current_pos += 1
@bits_left_in_byte = 8
end
end
chunk
end
end
CODE_EOD = 257 #: Integer #end of data
CODE_CLEAR_TABLE = 256 #: Integer #clear table
# stores de pairs code => string
class StringTable
#: Integer
attr_reader :string_table_pos
#: () -> void
def initialize
@data = Hash.new #: Hash[Integer, String]
# The initial code
@string_table_pos = 258 #: Integer
end
#if code less than 258 return fixed string
#: (Integer) -> String?
def [](key)
if key > 257
@data[key]
else
key.chr
end
end
#: (String) -> void
def add(string)
@data.store(@string_table_pos, string)
@string_table_pos += 1
end
end
# Decompresses a LZW compressed string.
#
#: (String) -> String
def self.decode(data)
stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
string_table = StringTable.new
result = "".dup
until (code = stream.read) == CODE_EOD
if code == CODE_CLEAR_TABLE
stream.set_bits_in_chunk(9)
string_table = StringTable.new
code = stream.read
break if code == CODE_EOD
result << string_table[code]
old_code = code
else
string = string_table[code]
if string
result << string
string_table.add create_new_string(string_table, old_code, code)
old_code = code
else
new_string = create_new_string(string_table, old_code, old_code)
result << new_string
string_table.add new_string
old_code = code
end
#increase de size of the codes when limit reached
if string_table.string_table_pos == 511
stream.set_bits_in_chunk(10)
elsif string_table.string_table_pos == 1023
stream.set_bits_in_chunk(11)
elsif string_table.string_table_pos == 2047
stream.set_bits_in_chunk(12)
end
end
end
result
end
#: (PDF::Reader::LZW::StringTable, Integer?, Integer?) -> String
def self.create_new_string(string_table, some_code, other_code)
raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?
item_one = string_table[some_code]
item_two = string_table[other_code]
if item_one && item_two
item_one + item_two.chr
else
raise MalformedPDFError, "invalid LZW data"
end
end
private_class_method :create_new_string
end
end
end
|