File: crypto_aes_aesni.c

package info (click to toggle)
python-scrypt 0.8.27-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 544 kB
  • sloc: ansic: 3,619; python: 549; sh: 99; makefile: 5
file content (239 lines) | stat: -rw-r--r-- 7,903 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#include "cpusupport.h"
#ifdef CPUSUPPORT_X86_AESNI

#include <stdint.h>
#include <stdlib.h>
#include <wmmintrin.h>

#include "insecure_memzero.h"
#include "warnp.h"

#include "crypto_aes_aesni.h"

/* Expanded-key structure. */
struct crypto_aes_key_aesni {
	uint8_t rkeys_buf[15 * sizeof(__m128i) + (sizeof(__m128i) - 1)];
	__m128i * rkeys;
	size_t nr;
};

/* Compute an AES-128 round key. */
#define MKRKEY128(rkeys, i, rcon) do {				\
	__m128i _s = rkeys[i - 1];				\
	__m128i _t = rkeys[i - 1];				\
	_s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4));		\
	_s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8));		\
	_t = _mm_aeskeygenassist_si128(_t, rcon);		\
	_t = _mm_shuffle_epi32(_t, 0xff);			\
	rkeys[i] = _mm_xor_si128(_s, _t);			\
} while (0)

/**
 * crypto_aes_key_expand_128_aesni(key, rkeys):
 * Expand the 128-bit AES key ${key} into the 11 round keys ${rkeys}.  This
 * implementation uses x86 AESNI instructions, and should only be used if
 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
 */
static void
crypto_aes_key_expand_128_aesni(const uint8_t key[16], __m128i rkeys[11])
{

	/* The first round key is just the key. */
	/**
	 * XXX Compiler breakage:
	 * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
	 * taking a (const __m128i *) parameter.  This forces us to write a
	 * bug: The cast to (const __m128i *) is invalid since it increases
	 * the alignment requirement of the pointer.  Alas, until compilers
	 * get fixed intrinsics, all we can do is code the bug and require
	 * that alignment-requirement-increasing compiler warnings get
	 * disabled.
	 */
	rkeys[0] = _mm_loadu_si128((const __m128i *)&key[0]);

	/*
	 * Each of the remaining round keys are computed from the preceding
	 * round key: rotword+subword+rcon (provided as aeskeygenassist) to
	 * compute the 'temp' value, then xor with 1, 2, 3, or all 4 of the
	 * 32-bit words from the preceding round key.  Unfortunately, 'rcon'
	 * is encoded as an immediate value, so we need to write the loop out
	 * ourselves rather than allowing the compiler to expand it.
	 */
	MKRKEY128(rkeys, 1, 0x01);
	MKRKEY128(rkeys, 2, 0x02);
	MKRKEY128(rkeys, 3, 0x04);
	MKRKEY128(rkeys, 4, 0x08);
	MKRKEY128(rkeys, 5, 0x10);
	MKRKEY128(rkeys, 6, 0x20);
	MKRKEY128(rkeys, 7, 0x40);
	MKRKEY128(rkeys, 8, 0x80);
	MKRKEY128(rkeys, 9, 0x1b);
	MKRKEY128(rkeys, 10, 0x36);
}

/* Compute an AES-256 round key. */
#define MKRKEY256(rkeys, i, shuffle, rcon)	do {		\
	__m128i _s = rkeys[i - 2];				\
	__m128i _t = rkeys[i - 1];				\
	_s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4));		\
	_s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8));		\
	_t = _mm_aeskeygenassist_si128(_t, rcon);		\
	_t = _mm_shuffle_epi32(_t, shuffle);			\
	rkeys[i] = _mm_xor_si128(_s, _t);			\
} while (0)

/**
 * crypto_aes_key_expand_256_aesni(key, rkeys):
 * Expand the 256-bit AES key ${key} into the 15 round keys ${rkeys}.  This
 * implementation uses x86 AESNI instructions, and should only be used if
 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
 */
static void
crypto_aes_key_expand_256_aesni(const uint8_t key[32], __m128i rkeys[15])
{

	/* The first two round keys are just the key. */
	/**
	 * XXX Compiler breakage:
	 * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
	 * taking a (const __m128i *) parameter.  This forces us to write a
	 * bug: The cast to (const __m128i *) is invalid since it increases
	 * the alignment requirement of the pointer.  Alas, until compilers
	 * get fixed intrinsics, all we can do is code the bug and require
	 * that alignment-requirement-increasing compiler warnings get
	 * disabled.
	 */
	rkeys[0] = _mm_loadu_si128((const __m128i *)&key[0]);
	rkeys[1] = _mm_loadu_si128((const __m128i *)&key[16]);

	/*
	 * Each of the remaining round keys are computed from the preceding
	 * pair of keys.  Even rounds use rotword+subword+rcon, while odd
	 * rounds just use subword; the aeskeygenassist instruction computes
	 * both, and we use 0xff or 0xaa to select the one we need.  The rcon
	 * value used is irrelevant for odd rounds since we ignore the value
	 * which it feeds into.  Unfortunately, the 'shuffle' and 'rcon'
	 * values are encoded into the instructions as immediates, so we need
	 * to write the loop out ourselves rather than allowing the compiler
	 * to expand it.
	 */
	MKRKEY256(rkeys, 2, 0xff, 0x01);
	MKRKEY256(rkeys, 3, 0xaa, 0x00);
	MKRKEY256(rkeys, 4, 0xff, 0x02);
	MKRKEY256(rkeys, 5, 0xaa, 0x00);
	MKRKEY256(rkeys, 6, 0xff, 0x04);
	MKRKEY256(rkeys, 7, 0xaa, 0x00);
	MKRKEY256(rkeys, 8, 0xff, 0x08);
	MKRKEY256(rkeys, 9, 0xaa, 0x00);
	MKRKEY256(rkeys, 10, 0xff, 0x10);
	MKRKEY256(rkeys, 11, 0xaa, 0x00);
	MKRKEY256(rkeys, 12, 0xff, 0x20);
	MKRKEY256(rkeys, 13, 0xaa, 0x00);
	MKRKEY256(rkeys, 14, 0xff, 0x40);
}

/**
 * crypto_aes_key_expand_aesni(key, len):
 * Expand the ${len}-byte AES key ${key} into a structure which can be passed
 * to crypto_aes_encrypt_block_aesni.  The length must be 16 or 32.  This
 * implementation uses x86 AESNI instructions, and should only be used if
 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
 */
void *
crypto_aes_key_expand_aesni(const uint8_t * key, size_t len)
{
	struct crypto_aes_key_aesni * kexp;
	size_t rkey_offset;

	/* Allocate structure. */
	if ((kexp = malloc(sizeof(struct crypto_aes_key_aesni))) == NULL)
		goto err0;

	/* Figure out where to put the round keys. */
	rkey_offset = (uintptr_t)(&kexp->rkeys_buf[0]) % sizeof(__m128i);
	rkey_offset = (sizeof(__m128i) - rkey_offset) % sizeof(__m128i);
	kexp->rkeys = (void *)&kexp->rkeys_buf[rkey_offset];

	/* Compute round keys. */
	if (len == 16) {
		kexp->nr = 10;
		crypto_aes_key_expand_128_aesni(key, kexp->rkeys);
	} else if (len == 32) {
		kexp->nr = 14;
		crypto_aes_key_expand_256_aesni(key, kexp->rkeys);
	} else {
		warn0("Unsupported AES key length: %zu bytes", len);
		goto err1;
	}

	/* Success! */
	return (kexp);

err1:
	free(kexp);
err0:
	/* Failure! */
	return (NULL);
}

/**
 * crypto_aes_encrypt_block_aesni(in, out, key):
 * Using the expanded AES key ${key}, encrypt the block ${in} and write the
 * resulting ciphertext to ${out}.  This implementation uses x86 AESNI
 * instructions, and should only be used if CPUSUPPORT_X86_AESNI is defined
 * and cpusupport_x86_aesni() returns nonzero.
 */
void
crypto_aes_encrypt_block_aesni(const uint8_t * in, uint8_t * out,
    const void * key)
{
	const struct crypto_aes_key_aesni * _key = key;
	const __m128i * aes_key = _key->rkeys;
	__m128i aes_state;
	size_t nr = _key->nr;

	aes_state = _mm_loadu_si128((const __m128i *)in);
	aes_state = _mm_xor_si128(aes_state, aes_key[0]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[1]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[2]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[3]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[4]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[5]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[6]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[7]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[8]);
	aes_state = _mm_aesenc_si128(aes_state, aes_key[9]);
	if (nr > 10) {
		aes_state = _mm_aesenc_si128(aes_state, aes_key[10]);
		aes_state = _mm_aesenc_si128(aes_state, aes_key[11]);

		if (nr > 12) {
			aes_state = _mm_aesenc_si128(aes_state, aes_key[12]);
			aes_state = _mm_aesenc_si128(aes_state, aes_key[13]);
		}
	}

	aes_state = _mm_aesenclast_si128(aes_state, aes_key[nr]);
	_mm_storeu_si128((__m128i *)out, aes_state);
}

/**
 * crypto_aes_key_free_aesni(key):
 * Free the expanded AES key ${key}.
 */
void
crypto_aes_key_free_aesni(void * key)
{

	/* Behave consistently with free(NULL). */
	if (key == NULL)
		return;

	/* Attempt to zero the expanded key. */
	insecure_memzero(key, sizeof(struct crypto_aes_key_aesni));

	/* Free the key. */
	free(key);
}

#endif /* CPUSUPPORT_X86_AESNI */