File: biff8.rb

package info (click to toggle)
ruby-spreadsheet 1.3.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 8,980 kB
  • sloc: ruby: 6,939; makefile: 10
file content (243 lines) | stat: -rw-r--r-- 10,049 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
module Spreadsheet
  module Excel
    class Reader
      ##
      # This Module collects reader methods such as read_string that are specific to
      # Biff8.  This Module is likely to be expanded as Support for older Versions
      # of Excel grows and methods get moved here for disambiguation.
      module Biff8
        include Spreadsheet::Excel::Internals
        ##
        # When a String is too long for one Opcode, it is continued in a Continue
        # Opcode. Excel may reconsider compressing the remainder of the string.
        # This method appends the available remainder (decompressed if necessary) to
        # the incomplete string.
        def continue_string work, incomplete_string = @incomplete_string
          opts, _ = work.unpack "C"
          wide = opts & 1
          head, chars = incomplete_string
          owing = chars - head.size / 2
          size = owing * (wide + 1)
          string = work[1, size]
          if wide == 0
            string = wide string
          end
          head << string
          if head.size >= chars * 2
            @incomplete_string = nil
          end
          size + 1
        end

        # When a String is too long for one Opcode, it is continued in a Continue
        # Opcode. Excel may reconsider compressing the remainder of the string.
        # This method appends the available remainder (decompressed if necessary) to
        # the incomplete string.
        def unpack_string work
          opts, _ = work.unpack "C"
          wide = opts & 1
          string = work[1, -1]
          if wide == 0
            wide string
          end
        end

        ##
        # When a String is too long for one Opcode, it is continued in a Continue
        # Opcode. Excel may reconsider compressing the remainder of the string.
        # This method only evaluates the header and registers the address of the
        # continuation with the previous SstEntry.
        def continue_string_header work, oppos
          opts, _ = work.unpack "C"
          wide = opts & 1
          owing = @incomplete_sst.continued_chars
          size = [work.size, owing * (1 + wide) + 1].min
          chars = (size - 1) / (1 + wide)
          skip = size
          @incomplete_sst.continue oppos + OPCODE_SIZE, size, chars
          unless @incomplete_sst.continued?
            @workbook.add_shared_string @incomplete_sst
            skip += @incomplete_skip
            @incomplete_sst = nil
            @incomplete_skip = nil
          end
          skip
        end

        ##
        # Read more data into the Shared String Table. (see also: #read_sst)
        # This method only evaluates the header, the actual work is done in #_read_sst
        def continue_sst work, oppos, len
          pos = 0
          if @incomplete_sst
            pos = continue_string_header work, oppos
          elsif !@incomplete_skip.nil?
            pos = @incomplete_skip
            @incomplete_skip = nil
          end
          @sst_offset[1] += len
          _read_sst work, oppos, pos
        end

        def postread_workbook # :nodoc:
          super
          @incomplete_string, @sst_size, @sst_offset, @incomplete_sst = nil, @incomplete_skip = nil
        end

        ##
        # Store the offset of extsst, so we can write a new extsst when the
        # sst has changed
        def read_extsst work, pos, len
          @workbook.offsets.store :extsst, [pos, len]
        end

        ##
        # Read the Shared String Table present in all Biff8 Files.
        # This method only evaluates the header, the actual work is done in #_read_sst
        def read_sst work, pos, len
          # Offset  Size  Contents
          #      0     4  Total number of strings in the workbook (see below)
          #      4     4  Number of following strings (nm)
          #      8  var.  List of nm Unicode strings, 16-bit string length (➜ 3.4)
          _, @sst_size = work.unpack "V2"
          @sst_offset = [pos, len]
          @workbook.offsets.store :sst, @sst_offset
          _read_sst work, pos, 8
        end

        ##
        # Read a string from the Spreadsheet, such as a Worksheet- or Font-Name, or a
        # Number-Format. See also #read_string_header and #read_string_body
        def read_string work, count_length = 1
          #   Offset    Size  Contents
          #        0  1 or 2  Length of the string (character count, ln)
          #   1 or 2       1  Option flags:
          #                   Bit  Mask  Contents
          #                     0  0x01  Character compression (ccompr):
          #                              0 = Compressed (8-bit characters)
          #                              1 = Uncompressed (16-bit characters)
          #                     2  0x04  Asian phonetic settings (phonetic):
          #                              0 = Does not contain Asian phonetic settings
          #                              1 = Contains Asian phonetic settings
          #                     3  0x08  Rich-Text settings (richtext):
          #                              0 = Does not contain Rich-Text settings
          #                              1 = Contains Rich-Text settings
          # [2 or 3]       2  (optional, only if richtext=1)
          #                   Number of Rich-Text formatting runs (rt)
          #   [var.]       4  (optional, only if phonetic=1)
          #                   Size of Asian phonetic settings block (in bytes, sz)
          #     var.      ln  Character array (8-bit characters
          #          or 2∙ln               or 16-bit characters, dependent on ccompr)
          #   [var.]    4∙rt  (optional, only if richtext=1)
          #                   List of rt formatting runs (➜ 3.2)
          #   [var.]      sz  (optional, only if phonetic=1)
          #                   Asian Phonetic Settings Block (➜ 3.4.2)
          chars, offset, wide, _, _, available, owing, _ = read_string_header work, count_length
          string, _ = read_string_body work, offset, available, wide > 0
          if owing > 0
            @incomplete_string = [string, chars]
          end
          string
        end

        ##
        # Read the body of a string. Returns the String (decompressed if necessary) and
        # the available data (unchanged).
        def read_string_body work, offset, available, wide
          data = work[offset, available]
          widened_data = wide ? data : wide(data)
          [widened_data, data]
        end

        ##
        # Read the header of a string. Returns the following information in an Array:
        # * The total number of characters in the string
        # * The offset of the actual string data (= the length of this header in bytes)
        # * Whether or not the string was compressed (0/1)
        # * Whether or not the string contains asian phonetic settings (0/1)
        # * Whether or not the string contains richtext formatting (0/1)
        # * The number of bytes containing characters in this chunk of data
        # * The number of characters missing from this chunk of data and expected to
        #   follow in a Continue Opcode
        def read_string_header work, count_length = 1, offset = 0
          fmt = (count_length == 1) ? "C2" : "vC"
          chars, opts = work[offset, 1 + count_length].unpack fmt
          wide = opts & 1
          phonetic = (opts >> 2) & 1
          richtext = (opts >> 3) & 1
          size = chars * (wide + 1)
          skip = 0
          if richtext > 0
            runs, = work[offset + 1 + count_length, 2].unpack "v"
            skip = 4 * runs
          end
          if phonetic > 0
            psize, = work[offset + 1 + count_length + richtext * 2, 4].unpack "V"
            skip += psize
          end
          flagsize = 1 + count_length + richtext * 2 + phonetic * 4
          avbl = [work.size - offset, flagsize + size].min
          have_chrs = (avbl - flagsize) / (1 + wide)
          owing = chars - have_chrs
          [chars, flagsize, wide, phonetic, richtext, avbl, owing, skip]
        end

        def read_range_address_list work, len
          # Cell range address, BIFF8:
          # Offset  Size  Contents
          # 0       2     Index to first row
          # 2       2     Index to last row
          # 4       2     Index to first column
          # 6       2     Index to last column
          # ! In several cases, BIFF8 still writes the BIFF2-BIFF5 format of a cell range address
          # (using 8-bit values for the column indexes). This will be mentioned at the respective place.
          #
          results = []
          return results if len < 2
          count = work[0..1].unpack1("v")
          offset = 2
          count.times do |i|
            results << work[offset...offset + 8].unpack("v4")
            offset += 8
          end
          results
        end

        ##
        # Insert null-characters into a compressed UTF-16 string
        def wide string
          data = "".dup
          string.each_byte { |byte| data << byte.chr << 0.chr }
          data
        end

        private

        ##
        # Read the Shared String Table present in all Biff8 Files.
        def _read_sst work, oppos, pos
          worksize = work.size
          while @workbook.sst_size < @sst_size && pos < worksize
            sst = SstEntry.new offset: oppos + OPCODE_SIZE + pos,
              ole: @data,
              reader: self
            sst.chars, sst.flags, wide, sst.phonetic, sst.richtext, sst.available,
              sst.continued_chars, skip = read_string_header work, 2, pos
            sst.wide = wide > 0
            if sst.continued?
              @incomplete_sst = sst
              @incomplete_skip = skip
              pos += sst.available
            else
              @workbook.add_shared_string sst
              pos += sst.available + skip
              if pos > worksize
                @incomplete_skip = pos - worksize
              end
            end
          end
        end
      end
    end
  end
end