1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
package reader
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"fmt"
"io"
"os"
"strings"
"github.com/klauspost/compress/zstd"
log "github.com/sirupsen/logrus"
"github.com/ulikunitz/xz"
)
var gzipMagic = []byte{0x1f, 0x8b}
var bzip2Magic = []byte{0x42, 0x5a, 0x68}
var zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
var xzMagic = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
// The second return value is the file name with any compression extension removed.
func ZOpen(filename string) (io.ReadCloser, string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, "", err
}
// Read the first 6 bytes to determine the compression type
firstBytes := make([]byte, 6)
_, err = file.Read(firstBytes)
if err != nil {
if err == io.EOF {
// File was empty
return file, filename, nil
}
return nil, "", fmt.Errorf("failed to read file: %w", err)
}
// Reset file reader to start of file
_, err = file.Seek(0, 0)
if err != nil {
return nil, "", fmt.Errorf("failed to seek to start of file: %w", err)
}
switch {
case bytes.HasPrefix(firstBytes, gzipMagic):
log.Debugf("File is gzip compressed: %v", filename)
reader, err := gzip.NewReader(file)
if err != nil {
return nil, "", err
}
newName := strings.TrimSuffix(filename, ".gz")
// Ref: https://github.com/walles/moor/issues/194
if strings.HasSuffix(newName, ".tgz") {
newName = strings.TrimSuffix(newName, ".tgz") + ".tar"
}
return reader, newName, err
case bytes.HasPrefix(firstBytes, bzip2Magic):
log.Debugf("File is bzip2 compressed: %v", filename)
return struct {
io.Reader
io.Closer
}{bzip2.NewReader(file), file}, strings.TrimSuffix(filename, ".bz2"), nil
case bytes.HasPrefix(firstBytes, zstdMagic):
log.Debugf("File is zstd compressed: %v", filename)
decoder, err := zstd.NewReader(file)
if err != nil {
return nil, "", err
}
newName := strings.TrimSuffix(filename, ".zst")
newName = strings.TrimSuffix(newName, ".zstd")
return decoder.IOReadCloser(), newName, nil
case bytes.HasPrefix(firstBytes, xzMagic):
log.Debugf("File is xz compressed: %v", filename)
xzReader, err := xz.NewReader(file)
if err != nil {
return nil, "", err
}
return struct {
io.Reader
io.Closer
}{xzReader, file}, strings.TrimSuffix(filename, ".xz"), nil
}
log.Debugf("File is assumed to be uncompressed: %v", filename)
return file, filename, nil
}
// ZReader returns a reader that decompresses the input stream. Any input stream
// compression will be automatically detected. Uncompressed streams will be
// returned as-is.
//
// Ref: https://github.com/walles/moor/issues/261
func ZReader(input io.Reader) (io.Reader, error) {
// Read the first 6 bytes to determine the compression type
firstBytes := make([]byte, 6)
count, err := input.Read(firstBytes)
if err != nil {
if err == io.EOF {
// Stream was empty
return input, nil
}
return nil, fmt.Errorf("failed to read stream: %w", err)
}
firstBytes = firstBytes[:count]
// Reset input reader to start of stream
input = io.MultiReader(bytes.NewReader(firstBytes), input)
switch {
case bytes.HasPrefix(firstBytes, gzipMagic):
log.Info("Input stream is gzip compressed")
return gzip.NewReader(input)
case bytes.HasPrefix(firstBytes, zstdMagic):
log.Info("Input stream is zstd compressed")
return zstd.NewReader(input)
case bytes.HasPrefix(firstBytes, bzip2Magic):
log.Info("Input stream is bzip2 compressed")
return bzip2.NewReader(input), nil
case bytes.HasPrefix(firstBytes, xzMagic):
log.Info("Input stream is xz compressed")
return xz.NewReader(input)
default:
// No magic numbers matched
log.Info("Input stream is assumed to be uncompressed")
return input, nil
}
}
|