File: reader.go

package info (click to toggle)
golang-github-ulikunitz-xz 0.5.15-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 11,496 kB
sloc: makefile: 7; sh: 5
file content (193 lines) | stat: -rw-r--r-- 5,952 bytes
// Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package lzma supports the decoding and encoding of LZMA streams.
// Reader and Writer support the classic LZMA format. Reader2 and
// Writer2 support the decoding and encoding of LZMA2 streams.
//
// The package is written completely in Go and does not rely on any external
// library.
package lzma

import (
	"errors"
	"fmt"
	"io"
)

// ReaderConfig stores the parameters for the reader of the classic LZMA
// format.
type ReaderConfig struct {
	// Since v0.5.14 this parameter sets an upper limit for a .lzma file's
	// dictionary size. This helps to mitigate problems with mangled
	// headers.
	DictCap int
}

// fill converts the zero values of the configuration to the default values.
func (c *ReaderConfig) fill() {
	if c.DictCap == 0 {
		// set an upper limit of 2 GiB-1 for dictionary capacity
		// to address the zero prefix security issue.
		c.DictCap = (1 << 31) - 1
		// original: c.DictCap = 8 * 1024 * 1024
	}
}

// Verify checks the reader configuration for errors. Zero values will
// be replaced by default values.
func (c *ReaderConfig) Verify() error {
	c.fill()
	if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
		return errors.New("lzma: dictionary capacity is out of range")
	}
	return nil
}

// Reader provides a reader for LZMA files or streams.
//
// # Security concerns
//
// Note that LZMA format doesn't support a magic marker in the header. So
// [NewReader] cannot determine whether it reads the actual header. For instance
// the LZMA stream might have a zero byte in front of the reader, leading to
// larger dictionary sizes and file sizes. The code will detect later that there
// are problems with the stream, but the dictionary has already been allocated
// and this might consume a lot of memory.
//
// Version 0.5.14 introduces built-in mitigations:
//
//   - The [ReaderConfig] DictCap field is now interpreted as a limit for the
//     dictionary size.
//   - The default is 2 Gigabytes minus 1 byte (2^31-1 bytes).
//   - Users can check with the [Reader.Header] method what the actual values are in
//     their LZMA files and set a smaller limit using [ReaderConfig].
//   - The dictionary size doesn't exceed the larger of the file size and
//     the minimum dictionary size. This is another measure to prevent huge
//     memory allocations for the dictionary.
//   - The code supports stream sizes only up to a pebibyte (1024^5).
type Reader struct {
	lzma   io.Reader
	header Header
	// headerOrig stores the original header read from the stream.
	headerOrig Header
	d          *decoder
}

// NewReader creates a new reader for an LZMA stream using the classic
// format. NewReader reads and checks the header of the LZMA stream.
func NewReader(lzma io.Reader) (r *Reader, err error) {
	return ReaderConfig{}.NewReader(lzma)
}

// ErrDictSize reports about an error of the dictionary size.
type ErrDictSize struct {
	ConfigDictCap  int
	HeaderDictSize uint32
	Message        string
}

// Error returns the error message.
func (e *ErrDictSize) Error() string {
	return e.Message
}

func newErrDictSize(messageformat string,
	configDictCap int, headerDictSize uint32,
	args ...interface{}) *ErrDictSize {
	newArgs := make([]interface{}, len(args)+2)
	newArgs[0] = configDictCap
	newArgs[1] = headerDictSize
	copy(newArgs[2:], args)
	return &ErrDictSize{
		ConfigDictCap:  configDictCap,
		HeaderDictSize: headerDictSize,
		Message:        fmt.Sprintf(messageformat, newArgs...),
	}
}

// We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5).
const maxStreamSize = 1 << 50

// NewReader creates a new reader for an LZMA stream in the classic
// format. The function reads and verifies the header of the LZMA
// stream.
func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
	if err = c.Verify(); err != nil {
		return nil, err
	}
	data := make([]byte, HeaderLen)
	if _, err := io.ReadFull(lzma, data); err != nil {
		if err == io.EOF {
			return nil, errors.New("lzma: unexpected EOF")
		}
		return nil, err
	}
	r = &Reader{lzma: lzma}
	if err = r.header.unmarshalBinary(data); err != nil {
		return nil, err
	}
	r.headerOrig = r.header
	dictSize := int64(r.header.DictSize)
	if int64(c.DictCap) < dictSize {
		return nil, newErrDictSize(
			"lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d",
			c.DictCap, uint32(dictSize),
		)
	}
	if dictSize < MinDictCap {
		dictSize = MinDictCap
	}
	// original code: disabled this because there is no point in increasing
	// the dictionary above what is stated in the file.
	/*
		if int64(c.DictCap) > int64(dictSize) {
			dictSize = int64(c.DictCap)
		}
	*/
	size := r.header.Size
	if size >= 0 && size < dictSize {
		dictSize = size
	}
	// Protect against modified or malicious headers.
	if size > maxStreamSize {
		return nil, fmt.Errorf(
			"lzma: stream size %d exceeds a pebibyte (1024^5)",
			size)
	}
	if dictSize < MinDictCap {
		dictSize = MinDictCap
	}

	r.header.DictSize = uint32(dictSize)

	state := newState(r.header.Properties)
	dict, err := newDecoderDict(int(dictSize))
	if err != nil {
		return nil, err
	}
	r.d, err = newDecoder(ByteReader(lzma), state, dict, r.header.Size)
	if err != nil {
		return nil, err
	}
	return r, nil
}

// Header returns the header as read from the LZMA stream. It is intended to
// allow the user to understand what parameters are typically provided in the
// headers of the LZMA files and set the DictCap field in [ReaderConfig]
// accordingly.
func (r *Reader) Header() (h Header, ok bool) {
	return r.headerOrig, r.d != nil
}

// EOSMarker indicates that an EOS marker has been encountered.
func (r *Reader) EOSMarker() bool {
	return r.d.eosMarker
}

// Read returns uncompressed data.
func (r *Reader) Read(p []byte) (n int, err error) {
	return r.d.Read(p)
}