File: ztoc.go

package info (click to toggle)
golang-github-awslabs-soci-snapshotter 0.4.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,952 kB
  • sloc: ansic: 459; sh: 237; makefile: 90
file content (227 lines) | stat: -rw-r--r-- 6,560 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/*
   Copyright The Soci Snapshotter Authors.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package ztoc

import (
	"archive/tar"
	"context"
	"fmt"
	"io"
	"os"
	"time"

	"github.com/opencontainers/go-digest"
	"golang.org/x/sync/errgroup"

	"github.com/awslabs/soci-snapshotter/ztoc/compression"
)

// Version defines the version of a Ztoc.
type Version string

// Ztoc versions available.
const (
	Version09 Version = "0.9"
)

// Ztoc is a table of contents for compressed data which consists 2 parts:
//
// (1). toc (`TOC`): a table of contents containing file metadata and its
// offset in the decompressed TAR archive.
// (2). zinfo (`CompressionInfo`): a collection of "checkpoints" of the
// state of the compression engine at various points in the layer.
type Ztoc struct {
	TOC
	CompressionInfo

	Version                 Version
	BuildToolIdentifier     string
	CompressedArchiveSize   compression.Offset
	UncompressedArchiveSize compression.Offset
}

// CompressionInfo is the "zinfo" part of ztoc including the `Checkpoints` data
// and other metadata such as all span digests.
type CompressionInfo struct {
	MaxSpanID            compression.SpanID //The total number of spans in Ztoc - 1
	SpanDigests          []digest.Digest
	Checkpoints          []byte
	CompressionAlgorithm string
}

// TOC is the "ztoc" part of ztoc including metadata of all files in the compressed
// data (e.g., a gzip tar file).
type TOC struct {
	FileMetadata []FileMetadata
}

// FileMetadata contains metadata of a file in the compressed data.
type FileMetadata struct {
	Name               string
	Type               string
	UncompressedOffset compression.Offset
	UncompressedSize   compression.Offset
	Linkname           string // Target name of link (valid for TypeLink or TypeSymlink)
	Mode               int64  // Permission and mode bits
	UID                int    // User ID of owner
	GID                int    // Group ID of owner
	Uname              string // User name of owner
	Gname              string // Group name of owner

	ModTime  time.Time // Modification time
	Devmajor int64     // Major device number (valid for TypeChar or TypeBlock)
	Devminor int64     // Minor device number (valid for TypeChar or TypeBlock)

	Xattrs map[string]string
}

// FileMode gets file mode for the file metadata
func (src FileMetadata) FileMode() (m os.FileMode) {
	// FileMetadata.Mode is tar.Header.Mode so we can understand the these bits using `tar` pkg.
	m = (&tar.Header{Mode: src.Mode}).FileInfo().Mode() &
		(os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky)
	switch src.Type {
	case "dir":
		m |= os.ModeDir
	case "symlink":
		m |= os.ModeSymlink
	case "char":
		m |= os.ModeDevice | os.ModeCharDevice
	case "block":
		m |= os.ModeDevice
	case "fifo":
		m |= os.ModeNamedPipe
	}
	return m
}

// MetadataEntry is used to locate a file based on its metadata.
type MetadataEntry struct {
	UncompressedSize   compression.Offset
	UncompressedOffset compression.Offset
}

// GetMetadataEntry gets MetadataEntry given a filename.
func (toc TOC) GetMetadataEntry(filename string) (MetadataEntry, error) {
	for _, v := range toc.FileMetadata {
		if v.Name == filename {
			if v.Linkname != "" {
				return toc.GetMetadataEntry(v.Linkname)
			}
			return MetadataEntry{
				UncompressedSize:   v.UncompressedSize,
				UncompressedOffset: v.UncompressedOffset,
			}, nil
		}
	}
	return MetadataEntry{}, fmt.Errorf("file %s does not exist in metadata", filename)
}

// ExtractFile extracts a file from compressed data (as a reader) and returns the
// byte data.
func (zt Ztoc) ExtractFile(r *io.SectionReader, filename string) ([]byte, error) {
	entry, err := zt.GetMetadataEntry(filename)
	if err != nil {
		return nil, err
	}
	if entry.UncompressedSize == 0 {
		return []byte{}, nil
	}

	zinfo, err := zt.Zinfo()
	if err != nil {
		return nil, nil
	}
	defer zinfo.Close()

	spanStart := zinfo.UncompressedOffsetToSpanID(entry.UncompressedOffset)
	spanEnd := zinfo.UncompressedOffsetToSpanID(entry.UncompressedOffset + entry.UncompressedSize)
	numSpans := spanEnd - spanStart + 1

	checkpoints := make([]compression.Offset, numSpans+1)
	checkpoints[0] = zinfo.StartCompressedOffset(spanStart)

	var i compression.SpanID
	for i = 0; i < numSpans; i++ {
		checkpoints[i+1] = zinfo.EndCompressedOffset(spanStart+i, zt.CompressedArchiveSize)
	}

	bufSize := checkpoints[len(checkpoints)-1] - checkpoints[0]
	buf := make([]byte, bufSize)
	eg, _ := errgroup.WithContext(context.Background())

	// Fetch all span data in parallel
	for i = 0; i < numSpans; i++ {
		i := i
		eg.Go(func() error {
			rangeStart := checkpoints[i]
			rangeEnd := checkpoints[i+1]
			n, err := r.ReadAt(buf[rangeStart-checkpoints[0]:rangeEnd-checkpoints[0]], int64(rangeStart)) // need to convert rangeStart to int64 to use in ReadAt
			if err != nil && err != io.EOF {
				return err
			}

			bytesToFetch := rangeEnd - rangeStart
			if n != int(bytesToFetch) {
				return fmt.Errorf("unexpected data size. read = %d, expected = %d", n, bytesToFetch)
			}
			return nil
		})
	}

	if err := eg.Wait(); err != nil {
		return nil, err
	}

	bytes, err := zinfo.ExtractDataFromBuffer(buf, entry.UncompressedSize, entry.UncompressedOffset, spanStart)
	if err != nil {
		return nil, err
	}

	return bytes, nil
}

// ExtractFromTarGz extracts data given a gzip tar file (`gz`) and its `ztoc`.
func (zt Ztoc) ExtractFromTarGz(gz string, filename string) (string, error) {
	entry, err := zt.GetMetadataEntry(filename)
	if err != nil {
		return "", err
	}

	if entry.UncompressedSize == 0 {
		return "", nil
	}

	zinfo, err := zt.Zinfo()
	if err != nil {
		return "", err
	}
	defer zinfo.Close()

	bytes, err := zinfo.ExtractDataFromFile(gz, entry.UncompressedSize, entry.UncompressedOffset)
	if err != nil {
		return "", err
	}

	return string(bytes), nil
}

// Zinfo deserilizes and returns a Zinfo based on the zinfo bytes and compression
// algorithm in the ztoc.
func (zt Ztoc) Zinfo() (compression.Zinfo, error) {
	return compression.NewZinfo(zt.CompressionAlgorithm, zt.Checkpoints)
}