File: hash.go

package info (click to toggle)
gitaly 16.11.6%2Bds1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,623,936 kB
  • sloc: cpp: 33,732; java: 20,579; sh: 5,372; cs: 3,973; javascript: 3,426; python: 3,053; makefile: 2,303; ansic: 1,729; php: 1,455; asm: 1,217; xml: 838; ruby: 452; sql: 431; yacc: 22; sed: 1
file content (186 lines) | stat: -rw-r--r-- 4,934 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cache

import (
	"bytes"
	"crypto/sha256"
	"fmt"
	"hash"
	"io"
	"os"
	"sync"
)

var debugHash = false // set when GODEBUG=gocachehash=1

// HashSize is the number of bytes in a hash.
const HashSize = 32

// A Hash provides access to the canonical hash function used to index the cache.
// The current implementation uses salted SHA256, but clients must not assume this.
type Hash struct {
	h    hash.Hash
	name string        // for debugging
	buf  *bytes.Buffer // for verify
}

// hashSalt is a salt string added to the beginning of every hash
// created by NewHash. Using the golangci-lint version makes sure that different
// versions of the command do not address the same cache
// entries, so that a bug in one version does not affect the execution
// of other versions. This salt will result in additional ActionID files
// in the cache, but not additional copies of the large output files,
// which are still addressed by unsalted SHA256.
var hashSalt []byte

func SetSalt(b []byte) {
	hashSalt = b
}

// Subkey returns an action ID corresponding to mixing a parent
// action ID with a string description of the subkey.
func Subkey(parent ActionID, desc string) (ActionID, error) {
	h := sha256.New()
	const subkeyPrefix = "subkey:"
	if n, err := h.Write([]byte(subkeyPrefix)); n != len(subkeyPrefix) {
		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of subkey prefix with error %s", n, len(subkeyPrefix), err)
	}
	if n, err := h.Write(parent[:]); n != len(parent) {
		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of parent with error %s", n, len(parent), err)
	}
	if n, err := h.Write([]byte(desc)); n != len(desc) {
		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of desc with error %s", n, len(desc), err)
	}

	var out ActionID
	h.Sum(out[:0])
	if debugHash {
		fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out)
	}
	if verify {
		hashDebug.Lock()
		hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc)
		hashDebug.Unlock()
	}
	return out, nil
}

// NewHash returns a new Hash.
// The caller is expected to Write data to it and then call Sum.
func NewHash(name string) (*Hash, error) {
	h := &Hash{h: sha256.New(), name: name}
	if debugHash {
		fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name)
	}
	if n, err := h.Write(hashSalt); n != len(hashSalt) {
		return nil, fmt.Errorf("wrote %d/%d bytes of hash salt with error %s", n, len(hashSalt), err)
	}
	if verify {
		h.buf = new(bytes.Buffer)
	}
	return h, nil
}

// Write writes data to the running hash.
func (h *Hash) Write(b []byte) (int, error) {
	if debugHash {
		fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b)
	}
	if h.buf != nil {
		h.buf.Write(b)
	}
	return h.h.Write(b)
}

// Sum returns the hash of the data written previously.
func (h *Hash) Sum() [HashSize]byte {
	var out [HashSize]byte
	h.h.Sum(out[:0])
	if debugHash {
		fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out)
	}
	if h.buf != nil {
		hashDebug.Lock()
		if hashDebug.m == nil {
			hashDebug.m = make(map[[HashSize]byte]string)
		}
		hashDebug.m[out] = h.buf.String()
		hashDebug.Unlock()
	}
	return out
}

// In GODEBUG=gocacheverify=1 mode,
// hashDebug holds the input to every computed hash ID,
// so that we can work backward from the ID involved in a
// cache entry mismatch to a description of what should be there.
var hashDebug struct {
	sync.Mutex
	m map[[HashSize]byte]string
}

// reverseHash returns the input used to compute the hash id.
func reverseHash(id [HashSize]byte) string {
	hashDebug.Lock()
	s := hashDebug.m[id]
	hashDebug.Unlock()
	return s
}

var hashFileCache struct {
	sync.Mutex
	m map[string][HashSize]byte
}

// FileHash returns the hash of the named file.
// It caches repeated lookups for a given file,
// and the cache entry for a file can be initialized
// using SetFileHash.
// The hash used by FileHash is not the same as
// the hash used by NewHash.
func FileHash(file string) ([HashSize]byte, error) {
	hashFileCache.Lock()
	out, ok := hashFileCache.m[file]
	hashFileCache.Unlock()

	if ok {
		return out, nil
	}

	h := sha256.New()
	f, err := os.Open(file)
	if err != nil {
		if debugHash {
			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
		}
		return [HashSize]byte{}, err
	}
	_, err = io.Copy(h, f)
	f.Close()
	if err != nil {
		if debugHash {
			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
		}
		return [HashSize]byte{}, err
	}
	h.Sum(out[:0])
	if debugHash {
		fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out)
	}

	SetFileHash(file, out)
	return out, nil
}

// SetFileHash sets the hash returned by FileHash for file.
func SetFileHash(file string, sum [HashSize]byte) {
	hashFileCache.Lock()
	if hashFileCache.m == nil {
		hashFileCache.m = make(map[string][HashSize]byte)
	}
	hashFileCache.m[file] = sum
	hashFileCache.Unlock()
}