File: zopen.go

package info (click to toggle)
moor 2.10.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 27,960 kB
  • sloc: sh: 229; ansic: 12; xml: 6; makefile: 5
file content (137 lines) | stat: -rw-r--r-- 3,737 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package reader

import (
	"bytes"
	"compress/bzip2"
	"compress/gzip"
	"fmt"
	"io"
	"os"
	"strings"

	"github.com/klauspost/compress/zstd"
	log "github.com/sirupsen/logrus"
	"github.com/ulikunitz/xz"
)

var gzipMagic = []byte{0x1f, 0x8b}
var bzip2Magic = []byte{0x42, 0x5a, 0x68}
var zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
var xzMagic = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}

// The second return value is the file name with any compression extension removed.
func ZOpen(filename string) (io.ReadCloser, string, error) {
	file, err := os.Open(filename)
	if err != nil {
		return nil, "", err
	}

	// Read the first 6 bytes to determine the compression type
	firstBytes := make([]byte, 6)
	_, err = file.Read(firstBytes)
	if err != nil {
		if err == io.EOF {
			// File was empty
			return file, filename, nil
		}
		return nil, "", fmt.Errorf("failed to read file: %w", err)
	}

	// Reset file reader to start of file
	_, err = file.Seek(0, 0)
	if err != nil {
		return nil, "", fmt.Errorf("failed to seek to start of file: %w", err)
	}

	switch {
	case bytes.HasPrefix(firstBytes, gzipMagic):
		log.Debugf("File is gzip compressed: %v", filename)
		reader, err := gzip.NewReader(file)
		if err != nil {
			return nil, "", err
		}

		newName := strings.TrimSuffix(filename, ".gz")

		// Ref: https://github.com/walles/moor/issues/194
		if strings.HasSuffix(newName, ".tgz") {
			newName = strings.TrimSuffix(newName, ".tgz") + ".tar"
		}

		return reader, newName, err

	case bytes.HasPrefix(firstBytes, bzip2Magic):
		log.Debugf("File is bzip2 compressed: %v", filename)
		return struct {
			io.Reader
			io.Closer
		}{bzip2.NewReader(file), file}, strings.TrimSuffix(filename, ".bz2"), nil

	case bytes.HasPrefix(firstBytes, zstdMagic):
		log.Debugf("File is zstd compressed: %v", filename)
		decoder, err := zstd.NewReader(file)
		if err != nil {
			return nil, "", err
		}

		newName := strings.TrimSuffix(filename, ".zst")
		newName = strings.TrimSuffix(newName, ".zstd")
		return decoder.IOReadCloser(), newName, nil

	case bytes.HasPrefix(firstBytes, xzMagic):
		log.Debugf("File is xz compressed: %v", filename)
		xzReader, err := xz.NewReader(file)
		if err != nil {
			return nil, "", err
		}

		return struct {
			io.Reader
			io.Closer
		}{xzReader, file}, strings.TrimSuffix(filename, ".xz"), nil
	}

	log.Debugf("File is assumed to be uncompressed: %v", filename)
	return file, filename, nil
}

// ZReader returns a reader that decompresses the input stream. Any input stream
// compression will be automatically detected. Uncompressed streams will be
// returned as-is.
//
// Ref: https://github.com/walles/moor/issues/261
func ZReader(input io.Reader) (io.Reader, error) {
	// Read the first 6 bytes to determine the compression type
	firstBytes := make([]byte, 6)
	count, err := input.Read(firstBytes)
	if err != nil {
		if err == io.EOF {
			// Stream was empty
			return input, nil
		}
		return nil, fmt.Errorf("failed to read stream: %w", err)
	}
	firstBytes = firstBytes[:count]

	// Reset input reader to start of stream
	input = io.MultiReader(bytes.NewReader(firstBytes), input)

	switch {
	case bytes.HasPrefix(firstBytes, gzipMagic):
		log.Info("Input stream is gzip compressed")
		return gzip.NewReader(input)
	case bytes.HasPrefix(firstBytes, zstdMagic):
		log.Info("Input stream is zstd compressed")
		return zstd.NewReader(input)
	case bytes.HasPrefix(firstBytes, bzip2Magic):
		log.Info("Input stream is bzip2 compressed")
		return bzip2.NewReader(input), nil
	case bytes.HasPrefix(firstBytes, xzMagic):
		log.Info("Input stream is xz compressed")
		return xz.NewReader(input)
	default:
		// No magic numbers matched
		log.Info("Input stream is assumed to be uncompressed")
		return input, nil
	}
}