1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
// Package md5x16 implements 16-lane parallel MD5 with AVX-512 instructions.
package md5x16
import (
"encoding/binary"
"errors"
"math"
"reflect"
"unsafe"
)
//go:generate go run asm.go -out md5x16.s -stubs stub.go
// Size of a MD5 checksum in bytes.
const Size = 16
// BlockSize is the block size of MD5 in bytes.
const BlockSize = 64
// Lanes is the maximum number of parallel MD5 computations.
const Lanes = 16
// Validate checks whether the preconditions required by Sum() are met.
func Validate(data [Lanes][]byte) error {
_, err := config(data)
return err
}
// Sum returns the MD5 checksum of up to Lanes data of the same length.
//
// Non-nil inputs must all have the same length, and occupy a memory span not
// exceeding 32 bits.
func Sum(data [Lanes][]byte) [Lanes][Size]byte {
// Determine lane configuration.
cfg, err := config(data)
if err != nil {
panic(err)
}
// Initialize hash.
var h [4][Lanes]uint32
for _, l := range cfg.active {
h[0][l] = 0x67452301
h[1][l] = 0xefcdab89
h[2][l] = 0x98badcfe
h[3][l] = 0x10325476
}
// Consume full blocks.
base, n := cfg.base, cfg.n
for ; n >= BlockSize; n -= BlockSize {
block(&h, base, &cfg.offsets, cfg.mask)
base += BlockSize
}
// Final block.
var last [Lanes][]byte
var buffer [Lanes * BlockSize]byte
base = dataptr(buffer[:])
var offsets [Lanes]uint32
for _, l := range cfg.active {
last[l] = buffer[l*BlockSize : (l+1)*BlockSize]
offsets[l] = uint32(l * BlockSize)
copy(last[l], data[l][cfg.n-n:])
last[l][n] = 0x80
}
if n >= 56 {
block(&h, base, &offsets, cfg.mask)
for i := range buffer {
buffer[i] = 0
}
}
for _, l := range cfg.active {
binary.LittleEndian.PutUint64(last[l][56:], uint64(8*cfg.n))
}
block(&h, base, &offsets, cfg.mask)
// Write into byte array.
var digest [Lanes][Size]byte
for _, l := range cfg.active {
for i := 0; i < 4; i++ {
binary.LittleEndian.PutUint32(digest[l][4*i:], h[i][l])
}
}
return digest
}
// lanes represents the configuration of the 16 data lanes of an MD5
// computation.
type lanes struct {
n int // length of all active (non-nil) lanes
active []int // indexes of active lanes
mask uint16 // mask of active lanes
base uintptr // base pointer
offsets [Lanes]uint32 // offset of data lanes relative to base
}
// config determines the lane configuration for the provided data. Returns an
// error if there are no active lanes, there's a length mismatch among active
// lanes, or the data spans a memory region larger than 32-bits.
func config(data [Lanes][]byte) (*lanes, error) {
cfg := &lanes{}
// Populate active lanes, and ensure they're all the same length.
for l, d := range data {
if d != nil {
cfg.active = append(cfg.active, l)
}
}
if len(cfg.active) == 0 {
return nil, errors.New("no active lanes")
}
cfg.n = len(data[cfg.active[0]])
for _, l := range cfg.active {
cfg.mask |= 1 << l
if len(data[l]) != cfg.n {
return nil, errors.New("length mismatch")
}
}
// Compute base pointer and lane offsets.
cfg.base = ^uintptr(0)
for _, l := range cfg.active {
ptr := dataptr(data[l])
if ptr < cfg.base {
cfg.base = ptr
}
}
for _, l := range cfg.active {
ptr := dataptr(data[l])
offset := ptr - cfg.base
if offset > math.MaxUint32 {
return nil, errors.New("input data exceed 32-bit memory region")
}
cfg.offsets[l] = uint32(offset)
}
return cfg, nil
}
// dataptr extracts the data pointer from the given slice.
func dataptr(data []byte) uintptr {
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&data))
return hdr.Data
}
|