1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
|
package minimal
// NOTE: This is used from github.com/containers/image by callers that
// don't otherwise use containers/storage, so don't make this depend on any
// larger software like the graph drivers.
import (
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"io"
"strings"
"time"
"github.com/containers/storage/pkg/archive"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
)
// ZstdWriter is an interface that wraps standard io.WriteCloser and Reset() to reuse the compressor with a new writer.
type ZstdWriter interface {
io.WriteCloser
Reset(dest io.Writer)
}
// CreateZstdWriterFunc is a function that creates a ZstdWriter for the provided destination writer.
type CreateZstdWriterFunc func(dest io.Writer) (ZstdWriter, error)
// TOC is short for Table of Contents and is used by the zstd:chunked
// file format to effectively add an overall index into the contents
// of a tarball; it also includes file metadata.
type TOC struct {
// Version is currently expected to be 1
Version int `json:"version"`
// Entries is the list of file metadata in this TOC.
// The ordering in this array currently defaults to being the same
// as that of the tar stream; however, this should not be relied on.
Entries []FileMetadata `json:"entries"`
// TarSplitDigest is the checksum of the "tar-split" data which
// is included as a distinct skippable zstd frame before the TOC.
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
}
// FileMetadata is an entry in the TOC that includes both generic file metadata
// that duplicates what can found in the tar header (and should match), but
// also special/custom content (see below).
//
// Regular files may optionally be represented as a sequence of “chunks”,
// which may be ChunkTypeData or ChunkTypeZeros (and ChunkTypeData boundaries
// are heuristically determined to increase chance of chunk matching / reuse
// similar to rsync). In that case, the regular file is represented
// as an initial TypeReg entry (with all metadata for the file as a whole)
// immediately followed by zero or more TypeChunk entries (containing only Type,
// Name and Chunk* fields); if there is at least one TypeChunk entry, the Chunk*
// fields are relevant in all of these entries, including the initial
// TypeReg one.
//
// Note that the metadata here, when fetched by a zstd:chunked aware client,
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// If you add any fields, update ensureFileMetadataMatches as well!
// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
Mode int64 `json:"mode,omitempty"`
Size int64 `json:"size,omitempty"`
UID int `json:"uid,omitempty"`
GID int `json:"gid,omitempty"`
ModTime *time.Time `json:"modtime,omitempty"`
AccessTime *time.Time `json:"accesstime,omitempty"`
ChangeTime *time.Time `json:"changetime,omitempty"`
Devmajor int64 `json:"devMajor,omitempty"`
Devminor int64 `json:"devMinor,omitempty"`
Xattrs map[string]string `json:"xattrs,omitempty"`
// Digest is a hexadecimal sha256 checksum of the file contents; it
// is empty for empty files
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
ChunkType string `json:"chunkType,omitempty"`
}
const (
ChunkTypeData = ""
ChunkTypeZeros = "zeros"
)
const (
// The following types correspond to regular types of entries that can
// appear in a tar archive.
TypeReg = "reg"
TypeLink = "hardlink"
TypeChar = "char"
TypeBlock = "block"
TypeDir = "dir"
TypeFifo = "fifo"
TypeSymlink = "symlink"
// TypeChunk is special; in zstd:chunked not only are files individually
// compressed and indexable, there is a "rolling checksum" used to compute
// "chunks" of individual file contents, that are also added to the TOC
TypeChunk = "chunk"
)
var TarTypes = map[byte]string{
tar.TypeReg: TypeReg,
tar.TypeLink: TypeLink,
tar.TypeChar: TypeChar,
tar.TypeBlock: TypeBlock,
tar.TypeDir: TypeDir,
tar.TypeFifo: TypeFifo,
tar.TypeSymlink: TypeSymlink,
}
func GetType(t byte) (string, error) {
r, found := TarTypes[t]
if !found {
return "", fmt.Errorf("unknown tarball type: %v", t)
}
return r, nil
}
const (
// ManifestChecksumKey is a hexadecimal sha256 digest of the compressed manifest digest.
ManifestChecksumKey = "io.github.containers.zstd-chunked.manifest-checksum"
// ManifestInfoKey is an annotation that signals the start of the TOC (manifest)
// contents which are embedded as a skippable zstd frame. It has a format of
// four decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>:<type>
// The <type> is ManifestTypeCRFS which should have the value `1`.
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
// TarSplitInfoKey is an annotation that signals the start of the "tar-split" metadata
// contents which are embedded as a skippable zstd frame. It has a format of
// three decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"
// TarSplitChecksumKey is no longer used and is replaced by the TOC.TarSplitDigest field instead.
// The value is retained here as a constant as a historical reference for older zstd:chunked images.
// TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum"
// ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
ManifestTypeCRFS = 1
// FooterSizeSupported is the footer size supported by this implementation.
// Newer versions of the image format might increase this value, so reject
// any version that is not supported.
FooterSizeSupported = 64
)
var (
// when the zstd decoder encounters a skippable frame + 1 byte for the size, it
// will ignore it.
// https://tools.ietf.org/html/rfc8478#section-3.1.2
skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18}
ZstdChunkedFrameMagic = []byte{0x47, 0x4e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78}
)
func appendZstdSkippableFrame(dest io.Writer, data []byte) error {
if _, err := dest.Write(skippableFrameMagic); err != nil {
return err
}
size := make([]byte, 4)
binary.LittleEndian.PutUint32(size, uint32(len(data)))
if _, err := dest.Write(size); err != nil {
return err
}
if _, err := dest.Write(data); err != nil {
return err
}
return nil
}
type TarSplitData struct {
Data []byte
Digest digest.Digest
UncompressedSize int64
}
func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, tarSplitData *TarSplitData, metadata []FileMetadata, createZstdWriter CreateZstdWriterFunc) error {
// 8 is the size of the zstd skippable frame header + the frame size
const zstdSkippableFrameHeader = 8
manifestOffset := offset + zstdSkippableFrameHeader
toc := TOC{
Version: 1,
Entries: metadata,
TarSplitDigest: tarSplitData.Digest,
}
json := jsoniter.ConfigCompatibleWithStandardLibrary
// Generate the manifest
manifest, err := json.Marshal(toc)
if err != nil {
return err
}
var compressedBuffer bytes.Buffer
zstdWriter, err := createZstdWriter(&compressedBuffer)
if err != nil {
return err
}
if _, err := zstdWriter.Write(manifest); err != nil {
zstdWriter.Close()
return err
}
if err := zstdWriter.Close(); err != nil {
return err
}
compressedManifest := compressedBuffer.Bytes()
manifestDigester := digest.Canonical.Digester()
manifestChecksum := manifestDigester.Hash()
if _, err := manifestChecksum.Write(compressedManifest); err != nil {
return err
}
outMetadata[ManifestChecksumKey] = manifestDigester.Digest().String()
outMetadata[ManifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), ManifestTypeCRFS)
if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil {
return err
}
tarSplitOffset := manifestOffset + uint64(len(compressedManifest)) + zstdSkippableFrameHeader
outMetadata[TarSplitInfoKey] = fmt.Sprintf("%d:%d:%d", tarSplitOffset, len(tarSplitData.Data), tarSplitData.UncompressedSize)
if err := appendZstdSkippableFrame(dest, tarSplitData.Data); err != nil {
return err
}
footer := ZstdChunkedFooterData{
ManifestType: uint64(ManifestTypeCRFS),
Offset: manifestOffset,
LengthCompressed: uint64(len(compressedManifest)),
LengthUncompressed: uint64(len(manifest)),
OffsetTarSplit: tarSplitOffset,
LengthCompressedTarSplit: uint64(len(tarSplitData.Data)),
LengthUncompressedTarSplit: uint64(tarSplitData.UncompressedSize),
}
manifestDataLE := footerDataToBlob(footer)
return appendZstdSkippableFrame(dest, manifestDataLE)
}
func ZstdWriterWithLevel(dest io.Writer, level int) (ZstdWriter, error) {
el := zstd.EncoderLevelFromZstd(level)
return zstd.NewWriter(dest, zstd.WithEncoderLevel(el))
}
// ZstdChunkedFooterData contains all the data stored in the zstd:chunked footer.
// This footer exists to make the blobs self-describing, our implementation
// never reads it:
// Partial pull security hinges on the TOC digest, and that exists as a layer annotation;
// so we are relying on the layer annotations anyway, and doing so means we can avoid
// a round-trip to fetch this binary footer.
type ZstdChunkedFooterData struct {
ManifestType uint64
Offset uint64
LengthCompressed uint64
LengthUncompressed uint64
OffsetTarSplit uint64
LengthCompressedTarSplit uint64
LengthUncompressedTarSplit uint64
ChecksumAnnotationTarSplit string // Deprecated: This field is not a part of the footer and not used for any purpose.
}
func footerDataToBlob(footer ZstdChunkedFooterData) []byte {
// Store the offset to the manifest and its size in LE order
manifestDataLE := make([]byte, FooterSizeSupported)
binary.LittleEndian.PutUint64(manifestDataLE[8*0:], footer.Offset)
binary.LittleEndian.PutUint64(manifestDataLE[8*1:], footer.LengthCompressed)
binary.LittleEndian.PutUint64(manifestDataLE[8*2:], footer.LengthUncompressed)
binary.LittleEndian.PutUint64(manifestDataLE[8*3:], footer.ManifestType)
binary.LittleEndian.PutUint64(manifestDataLE[8*4:], footer.OffsetTarSplit)
binary.LittleEndian.PutUint64(manifestDataLE[8*5:], footer.LengthCompressedTarSplit)
binary.LittleEndian.PutUint64(manifestDataLE[8*6:], footer.LengthUncompressedTarSplit)
copy(manifestDataLE[8*7:], ZstdChunkedFrameMagic)
return manifestDataLE
}
// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}
// NewFileMetadata creates a basic FileMetadata entry for hdr.
// The caller must set DigestOffset/EndOffset, and the Chunk* values, separately.
func NewFileMetadata(hdr *tar.Header) (FileMetadata, error) {
typ, err := GetType(hdr.Typeflag)
if err != nil {
return FileMetadata{}, err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
return FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
}, nil
}
|