File: reader.rb

package info (click to toggle)
ruby-minitar 1.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 364 kB
  • sloc: ruby: 2,602; makefile: 11
file content (276 lines) | stat: -rw-r--r-- 7,746 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# frozen_string_literal: true

class Minitar
  # The class that reads a tar format archive from a data stream. The data stream may be
  # sequential or random access, but certain features only work with random access data
  # streams.
  class Reader
    include Enumerable

    # This marks the EntryStream closed for reading without closing the actual data
    # stream.
    module InvalidEntryStream
      def read(*) = raise ClosedStream # :nodoc:

      def getc = raise ClosedStream # :nodoc:

      def rewind = raise ClosedStream # :nodoc:

      def closed? = true # :nodoc:
    end

    # EntryStreams are pseudo-streams on top of the main data stream.
    class EntryStream
      Minitar::PosixHeader::FIELDS.each do |field|
        attr_reader field.to_sym
      end

      def initialize(header, io)
        @io = io
        @name = header.name
        @mode = header.mode
        @uid = header.uid
        @gid = header.gid
        @size = header.size
        @mtime = header.mtime
        @checksum = header.checksum
        @typeflag = header.typeflag
        @linkname = header.linkname
        @magic = header.magic
        @version = header.version
        @uname = header.uname
        @gname = header.gname
        @devmajor = header.devmajor
        @devminor = header.devminor
        @prefix = header.prefix
        @read = 0
        @orig_pos =
          if Minitar.seekable?(@io)
            @io.pos
          else
            0
          end
      end

      # Reads +len+ bytes (or all remaining data) from the entry. Returns +nil+ if there
      # is no more data to read.
      def read(len = nil)
        return nil if @read >= @size
        len ||= @size - @read
        max_read = [len, @size - @read].min
        ret = @io.read(max_read)
        @read += ret.bytesize
        ret
      end

      # Reads one byte from the entry. Returns +nil+ if there is no more data to read.
      def getc
        return nil if @read >= @size
        ret = @io.getc
        @read += 1 if ret
        ret
      end

      # Returns +true+ if the entry represents a directory.
      def directory?
        case @typeflag
        when "5"
          true
        when "0", "\0"
          # If the name ends with a slash, treat it as a directory. This is what other
          # major tar implementations do for interoperability and compatibility with older
          # tar variants and some new ones.
          @name.end_with?("/")
        else
          false
        end
      end
      alias_method :directory, :directory?

      # Returns +true+ if the entry represents a plain file.
      def file?
        (@typeflag == "0" || @typeflag == "\0") && !@name.end_with?("/")
      end
      alias_method :file, :file?

      # Returns +true+ if the current read pointer is at the end of the EntryStream data.
      def eof? = @read >= @size

      # Returns the current read pointer in the EntryStream.
      def pos = @read

      alias_method :bytes_read, :pos

      # Sets the current read pointer to the beginning of the EntryStream.
      def rewind
        unless Minitar.seekable?(@io, :pos=)
          raise Minitar::NonSeekableStream
        end
        @io.pos = @orig_pos
        @read = 0
      end

      # Returns the full and proper name of the entry.
      def full_name
        if @prefix != ""
          File.join(@prefix, @name)
        else
          @name
        end
      end

      # Returns false if the entry stream is valid.
      def closed? = false

      # Closes the entry.
      def close = invalidate

      private

      def invalidate
        extend InvalidEntryStream
      end
    end

    # With no associated block, +Reader::open+ is a synonym for +Reader::new+. If the
    # optional code block is given, it will be passed the new _writer_ as an argument and
    # the Reader object will automatically be closed when the block terminates. In this
    # instance, +Reader::open+ returns the value of the block.
    def self.open(io)
      reader = new(io)
      return reader unless block_given?

      # This exception context must remain, otherwise the stream closes on open even if
      # a block is not given.
      begin
        yield reader
      ensure
        reader.close
      end
    end

    # Iterates over each entry in the provided input. This wraps the common pattern of:
    #
    #     Minitar::Input.open(io) do |i|
    #       inp.each do |entry|
    #         # ...
    #       end
    #     end
    #
    # If a block is not provided, an enumerator will be created with the same behaviour.
    #
    # :call-seq:
    #    Minitar::Reader.each_entry(io) -> enumerator
    #    Minitar::Reader.each_entry(io) { |entry| block } -> obj
    def self.each_entry(io)
      return to_enum(__method__, io) unless block_given?

      Input.open(io) do |reader|
        reader.each_entry do |entry|
          yield entry
        end
      end
    end

    # Creates and returns a new Reader object.
    def initialize(io)
      @io = io
      @init_pos = begin
        io.pos
      rescue
        nil
      end
    end

    # Resets the read pointer to the beginning of data stream. Do not call this during
    # a #each or #each_entry iteration. This only works with random access data streams
    # that respond to #rewind and #pos.
    def rewind
      if @init_pos.zero?
        raise Minitar::NonSeekableStream unless Minitar.seekable?(@io, :rewind)
        @io.rewind
      else
        raise Minitar::NonSeekableStream unless Minitar.seekable?(@io, :pos=)
        @io.pos = @init_pos
      end
    end

    # Iterates through each entry in the data stream.
    def each_entry
      return to_enum unless block_given?

      loop do
        return if @io.eof?

        header = Minitar::PosixHeader.from_stream(@io)
        raise Minitar::InvalidTarStream unless header.valid?
        return if header.empty?

        raise Minitar::InvalidTarStream if header.size < 0

        if header.long_name?
          name_block = (header.size / 512.0).ceil * 512

          long_name = @io.read(name_block).rstrip
          header = PosixHeader.from_stream(@io)

          return if header.empty?
          header.long_name = long_name
        elsif header.pax_header?
          pax_header = PaxHeader.from_stream(@io, header)

          header = PosixHeader.from_stream(@io)
          return if header.empty?

          header.size = pax_header.size if pax_header.size
        end

        entry = EntryStream.new(header, @io)
        size = entry.size

        yield entry

        skip = (512 - (size % 512)) % 512

        if Minitar.seekable?(@io, :seek)
          # avoid reading...
          try_seek(size - entry.bytes_read)
        else
          pending = size - entry.bytes_read
          while pending > 0
            bread = @io.read([pending, 4096].min).bytesize
            raise UnexpectedEOF if @io.eof?
            pending -= bread
          end
        end

        @io.read(skip) # discard trailing zeros
        # make sure nobody can use #read, #getc or #rewind anymore
        entry.close
      end
    end
    alias_method :each, :each_entry

    # Returns false if the reader is open (it never closes).
    def closed? = false

    def close
    end

    private

    def try_seek(bytes)
      @io.seek(bytes, IO::SEEK_CUR)
    rescue RangeError
      # This happens when skipping the large entry and the skipping entry size exceeds
      # maximum allowed size (varies by platform and underlying IO object).
      max = RbConfig::LIMITS.fetch("INT_MAX", 2147483647)
      skipped = 0
      while skipped < bytes
        to_skip = [bytes - skipped, max].min
        @io.seek(to_skip, IO::SEEK_CUR)
        skipped += to_skip
      end
    end
  end
end