1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
/* Copyright 2021 The ChromiumOS Authors
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*
* SHA256 implementation using x86 SHA extension.
* Mainly from https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-x86.c,
* Written and place in public domain by Jeffrey Walton
* Based on code from Intel, and by Sean Gulley for
* the miTLS project.
*/
#include "2common.h"
#include "2sha.h"
#include "2sha_private.h"
#include "2api.h"
const uint32_t vb2_hash_seq[8] = {3, 2, 7, 6, 1, 0, 5, 4};
typedef int vb2_m128i __attribute__ ((vector_size(16)));
static inline vb2_m128i vb2_loadu_si128(vb2_m128i *ptr)
{
vb2_m128i result;
asm volatile ("movups %1, %0" : "=x"(result) : "m"(*ptr));
return result;
}
static inline void vb2_storeu_si128(vb2_m128i *to, vb2_m128i from)
{
asm volatile ("movups %1, %0" : "=m"(*to) : "x"(from));
}
static inline vb2_m128i vb2_add_epi32(vb2_m128i a, vb2_m128i b)
{
return a + b;
}
static inline vb2_m128i vb2_shuffle_epi8(vb2_m128i value, vb2_m128i mask)
{
asm ("pshufb %1, %0" : "+x"(value) : "xm"(mask));
return value;
}
static inline vb2_m128i vb2_shuffle_epi32(vb2_m128i value, int mask)
{
vb2_m128i result;
asm ("pshufd %2, %1, %0" : "=x"(result) : "xm"(value), "i" (mask));
return result;
}
static inline vb2_m128i vb2_alignr_epi8(vb2_m128i a, vb2_m128i b, int imm8)
{
asm ("palignr %2, %1, %0" : "+x"(a) : "xm"(b), "i"(imm8));
return a;
}
static inline vb2_m128i vb2_sha256msg1_epu32(vb2_m128i a, vb2_m128i b)
{
asm ("sha256msg1 %1, %0" : "+x"(a) : "xm"(b));
return a;
}
static inline vb2_m128i vb2_sha256msg2_epu32(vb2_m128i a, vb2_m128i b)
{
asm ("sha256msg2 %1, %0" : "+x"(a) : "xm"(b));
return a;
}
static inline vb2_m128i vb2_sha256rnds2_epu32(vb2_m128i a, vb2_m128i b,
vb2_m128i k)
{
asm ("sha256rnds2 %1, %0" : "+x"(a) : "xm"(b), "Yz"(k));
return a;
}
#define SHA256_X86_PUT_STATE1(j, i) \
{ \
msgtmp[j] = vb2_loadu_si128((vb2_m128i *) \
(message + (i << 6) + (j * 16))); \
msgtmp[j] = vb2_shuffle_epi8(msgtmp[j], shuf_mask); \
msg = vb2_add_epi32(msgtmp[j], \
vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[j * 4])); \
state1 = vb2_sha256rnds2_epu32(state1, state0, msg); \
}
#define SHA256_X86_PUT_STATE0() \
{ \
msg = vb2_shuffle_epi32(msg, 0x0E); \
state0 = vb2_sha256rnds2_epu32(state0, state1, msg); \
}
#define SHA256_X86_LOOP(j) \
{ \
int k = j & 3; \
int prev_k = (k + 3) & 3; \
int next_k = (k + 1) & 3; \
msg = vb2_add_epi32(msgtmp[k], \
vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[j * 4])); \
state1 = vb2_sha256rnds2_epu32(state1, state0, msg); \
tmp = vb2_alignr_epi8(msgtmp[k], msgtmp[prev_k], 4); \
msgtmp[next_k] = vb2_add_epi32(msgtmp[next_k], tmp); \
msgtmp[next_k] = vb2_sha256msg2_epu32(msgtmp[next_k], \
msgtmp[k]); \
SHA256_X86_PUT_STATE0(); \
msgtmp[prev_k] = vb2_sha256msg1_epu32(msgtmp[prev_k], \
msgtmp[k]); \
}
static void vb2_sha256_transform_x86ext(const uint8_t *message,
unsigned int block_nb)
{
vb2_m128i state0, state1, msg, abef_save, cdgh_save;
vb2_m128i msgtmp[4];
vb2_m128i tmp;
int i;
const vb2_m128i shuf_mask = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f};
state0 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[0]);
state1 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[4]);
for (i = 0; i < (int) block_nb; i++) {
abef_save = state0;
cdgh_save = state1;
SHA256_X86_PUT_STATE1(0, i);
SHA256_X86_PUT_STATE0();
SHA256_X86_PUT_STATE1(1, i);
SHA256_X86_PUT_STATE0();
msgtmp[0] = vb2_sha256msg1_epu32(msgtmp[0], msgtmp[1]);
SHA256_X86_PUT_STATE1(2, i);
SHA256_X86_PUT_STATE0();
msgtmp[1] = vb2_sha256msg1_epu32(msgtmp[1], msgtmp[2]);
SHA256_X86_PUT_STATE1(3, i);
tmp = vb2_alignr_epi8(msgtmp[3], msgtmp[2], 4);
msgtmp[0] = vb2_add_epi32(msgtmp[0], tmp);
msgtmp[0] = vb2_sha256msg2_epu32(msgtmp[0], msgtmp[3]);
SHA256_X86_PUT_STATE0();
msgtmp[2] = vb2_sha256msg1_epu32(msgtmp[2], msgtmp[3]);
SHA256_X86_LOOP(4);
SHA256_X86_LOOP(5);
SHA256_X86_LOOP(6);
SHA256_X86_LOOP(7);
SHA256_X86_LOOP(8);
SHA256_X86_LOOP(9);
SHA256_X86_LOOP(10);
SHA256_X86_LOOP(11);
SHA256_X86_LOOP(12);
SHA256_X86_LOOP(13);
SHA256_X86_LOOP(14);
msg = vb2_add_epi32(msgtmp[3],
vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[15 * 4]));
state1 = vb2_sha256rnds2_epu32(state1, state0, msg);
SHA256_X86_PUT_STATE0();
state0 = vb2_add_epi32(state0, abef_save);
state1 = vb2_add_epi32(state1, cdgh_save);
}
vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[0], state0);
vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[4], state1);
}
void vb2_sha256_transform_hwcrypto(const uint8_t *message,
unsigned int block_nb)
{
vb2_sha256_transform_x86ext(message, block_nb);
}
|