1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
|
// Package multihash is the Go implementation of
// https://github.com/multiformats/multihash, or self-describing
// hashes.
package multihash
import (
"encoding/hex"
"errors"
"fmt"
"math"
b58 "github.com/mr-tron/base58/base58"
"github.com/multiformats/go-varint"
)
// errors
var (
ErrUnknownCode = errors.New("unknown multihash code")
ErrTooShort = errors.New("multihash too short. must be >= 2 bytes")
ErrTooLong = errors.New("multihash too long. must be < 129 bytes")
ErrLenNotSupported = errors.New("multihash does not yet support digests longer than 127 bytes")
ErrInvalidMultihash = errors.New("input isn't valid multihash")
ErrVarintBufferShort = errors.New("uvarint: buffer too small")
ErrVarintTooLong = errors.New("uvarint: varint too big (max 64bit)")
)
// ErrInconsistentLen is returned when a decoded multihash has an inconsistent length
type ErrInconsistentLen struct {
dm DecodedMultihash
lengthFound int
}
func (e ErrInconsistentLen) Error() string {
return fmt.Sprintf("multihash length inconsistent: expected %d; got %d", e.dm.Length, e.lengthFound)
}
// constants
const (
IDENTITY = 0x00
// Deprecated: use IDENTITY
ID = IDENTITY
SHA1 = 0x11
SHA2_256 = 0x12
SHA2_512 = 0x13
SHA3_224 = 0x17
SHA3_256 = 0x16
SHA3_384 = 0x15
SHA3_512 = 0x14
SHA3 = SHA3_512
KECCAK_224 = 0x1A
KECCAK_256 = 0x1B
KECCAK_384 = 0x1C
KECCAK_512 = 0x1D
BLAKE3 = 0x1E
SHAKE_128 = 0x18
SHAKE_256 = 0x19
BLAKE2B_MIN = 0xb201
BLAKE2B_MAX = 0xb240
BLAKE2S_MIN = 0xb241
BLAKE2S_MAX = 0xb260
MD5 = 0xd5
DBL_SHA2_256 = 0x56
MURMUR3X64_64 = 0x22
// Deprecated: use MURMUR3X64_64
MURMUR3 = MURMUR3X64_64
SHA2_256_TRUNC254_PADDED = 0x1012
X11 = 0x1100
POSEIDON_BLS12_381_A1_FC1 = 0xb401
)
func init() {
// Add blake2b (64 codes)
for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
n := c - BLAKE2B_MIN + 1
name := fmt.Sprintf("blake2b-%d", n*8)
Names[name] = c
Codes[c] = name
}
// Add blake2s (32 codes)
for c := uint64(BLAKE2S_MIN); c <= BLAKE2S_MAX; c++ {
n := c - BLAKE2S_MIN + 1
name := fmt.Sprintf("blake2s-%d", n*8)
Names[name] = c
Codes[c] = name
}
}
// Names maps the name of a hash to the code
var Names = map[string]uint64{
"identity": IDENTITY,
"sha1": SHA1,
"sha2-256": SHA2_256,
"sha2-512": SHA2_512,
"sha3": SHA3_512,
"sha3-224": SHA3_224,
"sha3-256": SHA3_256,
"sha3-384": SHA3_384,
"sha3-512": SHA3_512,
"dbl-sha2-256": DBL_SHA2_256,
"murmur3-x64-64": MURMUR3X64_64,
"keccak-224": KECCAK_224,
"keccak-256": KECCAK_256,
"keccak-384": KECCAK_384,
"keccak-512": KECCAK_512,
"blake3": BLAKE3,
"shake-128": SHAKE_128,
"shake-256": SHAKE_256,
"sha2-256-trunc254-padded": SHA2_256_TRUNC254_PADDED,
"x11": X11,
"md5": MD5,
"poseidon-bls12_381-a2-fc1": POSEIDON_BLS12_381_A1_FC1,
}
// Codes maps a hash code to it's name
var Codes = map[uint64]string{
IDENTITY: "identity",
SHA1: "sha1",
SHA2_256: "sha2-256",
SHA2_512: "sha2-512",
SHA3_224: "sha3-224",
SHA3_256: "sha3-256",
SHA3_384: "sha3-384",
SHA3_512: "sha3-512",
DBL_SHA2_256: "dbl-sha2-256",
MURMUR3X64_64: "murmur3-x64-64",
KECCAK_224: "keccak-224",
KECCAK_256: "keccak-256",
KECCAK_384: "keccak-384",
KECCAK_512: "keccak-512",
BLAKE3: "blake3",
SHAKE_128: "shake-128",
SHAKE_256: "shake-256",
SHA2_256_TRUNC254_PADDED: "sha2-256-trunc254-padded",
X11: "x11",
POSEIDON_BLS12_381_A1_FC1: "poseidon-bls12_381-a2-fc1",
MD5: "md5",
}
// reads a varint from buf and returns bytes read.
func uvarint(buf []byte) (uint64, []byte, error) {
n, c, err := varint.FromUvarint(buf)
if err != nil {
return n, buf, err
}
if c == 0 {
return n, buf, ErrVarintBufferShort
} else if c < 0 {
return n, buf[-c:], ErrVarintTooLong
} else {
return n, buf[c:], nil
}
}
// DecodedMultihash represents a parsed multihash and allows
// easy access to the different parts of a multihash.
type DecodedMultihash struct {
Code uint64
Name string
Length int // Length is just int as it is type of len() opearator
Digest []byte // Digest holds the raw multihash bytes
}
// Multihash is byte slice with the following form:
// <hash function code><digest size><hash function output>.
// See the spec for more information.
type Multihash []byte
// HexString returns the hex-encoded representation of a multihash.
func (m Multihash) HexString() string {
return hex.EncodeToString([]byte(m))
}
// String is an alias to HexString().
func (m Multihash) String() string {
return m.HexString()
}
// FromHexString parses a hex-encoded multihash.
func FromHexString(s string) (Multihash, error) {
b, err := hex.DecodeString(s)
if err != nil {
return Multihash{}, err
}
return Cast(b)
}
// B58String returns the B58-encoded representation of a multihash.
func (m Multihash) B58String() string {
return b58.Encode([]byte(m))
}
// FromB58String parses a B58-encoded multihash.
func FromB58String(s string) (m Multihash, err error) {
b, err := b58.Decode(s)
if err != nil {
return Multihash{}, ErrInvalidMultihash
}
return Cast(b)
}
// Cast casts a buffer onto a multihash, and returns an error
// if it does not work.
func Cast(buf []byte) (Multihash, error) {
_, err := Decode(buf)
if err != nil {
return Multihash{}, err
}
return Multihash(buf), nil
}
// Decode parses multihash bytes into a DecodedMultihash.
func Decode(buf []byte) (*DecodedMultihash, error) {
// outline decode allowing the &dm expression to be inlined into the caller.
// This moves the heap allocation into the caller and if the caller doesn't
// leak dm the compiler will use a stack allocation instead.
// If you do not outline this &dm always heap allocate since the pointer is
// returned which cause a heap allocation because Decode's stack frame is
// about to disapear.
dm, err := decode(buf)
if err != nil {
return nil, err
}
return &dm, nil
}
func decode(buf []byte) (dm DecodedMultihash, err error) {
rlen, code, hdig, err := readMultihashFromBuf(buf)
if err != nil {
return DecodedMultihash{}, err
}
dm = DecodedMultihash{
Code: code,
Name: Codes[code],
Length: len(hdig),
Digest: hdig,
}
if len(buf) != rlen {
return dm, ErrInconsistentLen{dm, rlen}
}
return dm, nil
}
// Encode a hash digest along with the specified function code.
// Note: the length is derived from the length of the digest itself.
//
// The error return is legacy; it is always nil.
func Encode(buf []byte, code uint64) ([]byte, error) {
// FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered?
newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf))
n := varint.PutUvarint(newBuf, code)
n += varint.PutUvarint(newBuf[n:], uint64(len(buf)))
copy(newBuf[n:], buf)
return newBuf, nil
}
// EncodeName is like Encode() but providing a string name
// instead of a numeric code. See Names for allowed values.
func EncodeName(buf []byte, name string) ([]byte, error) {
return Encode(buf, Names[name])
}
// readMultihashFromBuf reads a multihash from the given buffer, returning the
// individual pieces of the multihash.
// Note: the returned digest is a slice over the passed in data and should be
// copied if the buffer will be reused
func readMultihashFromBuf(buf []byte) (int, uint64, []byte, error) {
initBufLength := len(buf)
if initBufLength < 2 {
return 0, 0, nil, ErrTooShort
}
var err error
var code, length uint64
code, buf, err = uvarint(buf)
if err != nil {
return 0, 0, nil, err
}
length, buf, err = uvarint(buf)
if err != nil {
return 0, 0, nil, err
}
if length > math.MaxInt32 {
return 0, 0, nil, errors.New("digest too long, supporting only <= 2^31-1")
}
if int(length) > len(buf) {
return 0, 0, nil, errors.New("length greater than remaining number of bytes in buffer")
}
// rlen is the advertised size of the CID
rlen := (initBufLength - len(buf)) + int(length)
return rlen, code, buf[:length], nil
}
// MHFromBytes reads a multihash from the given byte buffer, returning the
// number of bytes read as well as the multihash
func MHFromBytes(buf []byte) (int, Multihash, error) {
nr, _, _, err := readMultihashFromBuf(buf)
if err != nil {
return 0, nil, err
}
return nr, Multihash(buf[:nr]), nil
}
|