1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
|
# frozen_string_literal: true
class Minitar
# The class that reads a tar format archive from a data stream. The data stream may be
# sequential or random access, but certain features only work with random access data
# streams.
class Reader
include Enumerable
# This marks the EntryStream closed for reading without closing the actual data
# stream.
module InvalidEntryStream
def read(*) = raise ClosedStream # :nodoc:
def getc = raise ClosedStream # :nodoc:
def rewind = raise ClosedStream # :nodoc:
def closed? = true # :nodoc:
end
# EntryStreams are pseudo-streams on top of the main data stream.
class EntryStream
Minitar::PosixHeader::FIELDS.each do |field|
attr_reader field.to_sym
end
def initialize(header, io)
@io = io
@name = header.name
@mode = header.mode
@uid = header.uid
@gid = header.gid
@size = header.size
@mtime = header.mtime
@checksum = header.checksum
@typeflag = header.typeflag
@linkname = header.linkname
@magic = header.magic
@version = header.version
@uname = header.uname
@gname = header.gname
@devmajor = header.devmajor
@devminor = header.devminor
@prefix = header.prefix
@read = 0
@orig_pos =
if Minitar.seekable?(@io)
@io.pos
else
0
end
end
# Reads +len+ bytes (or all remaining data) from the entry. Returns +nil+ if there
# is no more data to read.
def read(len = nil)
return nil if @read >= @size
len ||= @size - @read
max_read = [len, @size - @read].min
ret = @io.read(max_read)
@read += ret.bytesize
ret
end
# Reads one byte from the entry. Returns +nil+ if there is no more data to read.
def getc
return nil if @read >= @size
ret = @io.getc
@read += 1 if ret
ret
end
# Returns +true+ if the entry represents a directory.
def directory?
case @typeflag
when "5"
true
when "0", "\0"
# If the name ends with a slash, treat it as a directory. This is what other
# major tar implementations do for interoperability and compatibility with older
# tar variants and some new ones.
@name.end_with?("/")
else
false
end
end
alias_method :directory, :directory?
# Returns +true+ if the entry represents a plain file.
def file?
(@typeflag == "0" || @typeflag == "\0") && !@name.end_with?("/")
end
alias_method :file, :file?
# Returns +true+ if the current read pointer is at the end of the EntryStream data.
def eof? = @read >= @size
# Returns the current read pointer in the EntryStream.
def pos = @read
alias_method :bytes_read, :pos
# Sets the current read pointer to the beginning of the EntryStream.
def rewind
unless Minitar.seekable?(@io, :pos=)
raise Minitar::NonSeekableStream
end
@io.pos = @orig_pos
@read = 0
end
# Returns the full and proper name of the entry.
def full_name
if @prefix != ""
File.join(@prefix, @name)
else
@name
end
end
# Returns false if the entry stream is valid.
def closed? = false
# Closes the entry.
def close = invalidate
private
def invalidate
extend InvalidEntryStream
end
end
# With no associated block, +Reader::open+ is a synonym for +Reader::new+. If the
# optional code block is given, it will be passed the new _writer_ as an argument and
# the Reader object will automatically be closed when the block terminates. In this
# instance, +Reader::open+ returns the value of the block.
def self.open(io)
reader = new(io)
return reader unless block_given?
# This exception context must remain, otherwise the stream closes on open even if
# a block is not given.
begin
yield reader
ensure
reader.close
end
end
# Iterates over each entry in the provided input. This wraps the common pattern of:
#
# Minitar::Input.open(io) do |i|
# inp.each do |entry|
# # ...
# end
# end
#
# If a block is not provided, an enumerator will be created with the same behaviour.
#
# :call-seq:
# Minitar::Reader.each_entry(io) -> enumerator
# Minitar::Reader.each_entry(io) { |entry| block } -> obj
def self.each_entry(io)
return to_enum(__method__, io) unless block_given?
Input.open(io) do |reader|
reader.each_entry do |entry|
yield entry
end
end
end
# Creates and returns a new Reader object.
def initialize(io)
@io = io
@init_pos = begin
io.pos
rescue
nil
end
end
# Resets the read pointer to the beginning of data stream. Do not call this during
# a #each or #each_entry iteration. This only works with random access data streams
# that respond to #rewind and #pos.
def rewind
if @init_pos.zero?
raise Minitar::NonSeekableStream unless Minitar.seekable?(@io, :rewind)
@io.rewind
else
raise Minitar::NonSeekableStream unless Minitar.seekable?(@io, :pos=)
@io.pos = @init_pos
end
end
# Iterates through each entry in the data stream.
def each_entry
return to_enum unless block_given?
loop do
return if @io.eof?
header = Minitar::PosixHeader.from_stream(@io)
raise Minitar::InvalidTarStream unless header.valid?
return if header.empty?
raise Minitar::InvalidTarStream if header.size < 0
if header.long_name?
name_block = (header.size / 512.0).ceil * 512
long_name = @io.read(name_block).rstrip
header = PosixHeader.from_stream(@io)
return if header.empty?
header.long_name = long_name
elsif header.pax_header?
pax_header = PaxHeader.from_stream(@io, header)
header = PosixHeader.from_stream(@io)
return if header.empty?
header.size = pax_header.size if pax_header.size
end
entry = EntryStream.new(header, @io)
size = entry.size
yield entry
skip = (512 - (size % 512)) % 512
if Minitar.seekable?(@io, :seek)
# avoid reading...
try_seek(size - entry.bytes_read)
else
pending = size - entry.bytes_read
while pending > 0
bread = @io.read([pending, 4096].min).bytesize
raise UnexpectedEOF if @io.eof?
pending -= bread
end
end
@io.read(skip) # discard trailing zeros
# make sure nobody can use #read, #getc or #rewind anymore
entry.close
end
end
alias_method :each, :each_entry
# Returns false if the reader is open (it never closes).
def closed? = false
def close
end
private
def try_seek(bytes)
@io.seek(bytes, IO::SEEK_CUR)
rescue RangeError
# This happens when skipping the large entry and the skipping entry size exceeds
# maximum allowed size (varies by platform and underlying IO object).
max = RbConfig::LIMITS.fetch("INT_MAX", 2147483647)
skipped = 0
while skipped < bytes
to_skip = [bytes - skipped, max].min
@io.seek(to_skip, IO::SEEK_CUR)
skipped += to_skip
end
end
end
end
|