1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
|
/*
* SHA-512 hash in x86-64 assembly
*
* Copyright (c) 2017 Project Nayuki. (MIT License)
* https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
* - The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* - The Software is provided "as is", without warranty of any kind, express or
* implied, including but not limited to the warranties of merchantability,
* fitness for a particular purpose and noninfringement. In no event shall the
* authors or copyright holders be liable for any claim, damages or other
* liability, whether in an action of contract, tort or otherwise, arising from,
* out of or in connection with the Software or the use or other dealings in the
* Software.
*/
/* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */
#ifdef __APPLE__
.globl _sha512_compress
_sha512_compress:
#else
.globl sha512_compress
sha512_compress:
#endif
/*
* Storage usage:
* Bytes Location Description
* 8 rax Temporary for calculation per round
* 8 rbx Temporary for calculation per round
* 8 rcx Temporary for calculation per round
* 8 rdx Temporary for calculation per round
* 8 rsi Base address of block array argument (read-only)
* 8 rdi Base address of state array argument (read-only)
* 8 rsp x86-64 stack pointer
* 8 r8 SHA-512 state variable A
* 8 r9 SHA-512 state variable B
* 8 r10 SHA-512 state variable C
* 8 r11 SHA-512 state variable D
* 8 r12 SHA-512 state variable E
* 8 r13 SHA-512 state variable F
* 8 r14 SHA-512 state variable G
* 8 r15 SHA-512 state variable H
* 128 [rsp+0] Circular buffer of most recent 16 key schedule items, 8 bytes each
* 16 xmm0 Caller's value of r10 (only low 64 bits are used)
* 16 xmm1 Caller's value of r11 (only low 64 bits are used)
* 16 xmm2 Caller's value of r12 (only low 64 bits are used)
* 16 xmm3 Caller's value of r13 (only low 64 bits are used)
* 16 xmm4 Caller's value of r14 (only low 64 bits are used)
* 16 xmm5 Caller's value of r15 (only low 64 bits are used)
* 16 xmm6 Caller's value of rbx (only low 64 bits are used)
*/
#define SCHED(i) (((i)&0xF)*8)(%rsp)
#define ROUNDa(i, a, b, c, d, e, f, g, h, k) \
movq (i*8)(%rsi), %rbx; \
bswapq %rbx; \
movq %rbx, SCHED(i); \
ROUNDTAIL(a, b, c, d, e, f, g, h, k)
#define ROUNDb(i, a, b, c, d, e, f, g, h, k) \
movq SCHED(i-15), %rax; \
movq SCHED(i-16), %rbx; \
addq SCHED(i- 7), %rbx; \
movq %rax, %rcx; \
movq %rax, %rdx; \
rorq $8, %rcx; \
shrq $7, %rdx; \
rorq $1, %rax; \
xorq %rdx, %rcx; \
xorq %rcx, %rax; \
addq %rax, %rbx; \
movq SCHED(i- 2), %rax; \
movq %rax, %rcx; \
movq %rax, %rdx; \
rorq $61, %rcx; \
shrq $6, %rdx; \
rorq $19, %rax; \
xorq %rdx, %rcx; \
xorq %rcx, %rax; \
addq %rax, %rbx; \
movq %rbx, SCHED(i); \
ROUNDTAIL(a, b, c, d, e, f, g, h, k)
#define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \
/* Part 0 */ \
/* ROR transformation inspired by Intel's SHA-256 implementation */ \
movq %e, %rax; \
rorq $23, %rax; \
xorq %e, %rax; \
rorq $4, %rax; \
xorq %e, %rax; \
rorq $14, %rax; \
addq %rbx, %h; \
movq %g, %rcx; \
xorq %f, %rcx; \
andq %e, %rcx; \
xorq %g, %rcx; \
addq %rax, %h; \
movabs $k, %rax; \
addq %rcx, %h; \
addq %rax, %h; \
/* Part 1 */ \
addq %h, %d; \
/* Part 2 */ \
/* ROR transformation inspired by Intel's SHA-256 implementation */ \
movq %a, %rax; \
rorq $5, %rax; \
xorq %a, %rax; \
rorq $6, %rax; \
xorq %a, %rax; \
rorq $28, %rax; \
movq %c, %rcx; \
addq %rax, %h; \
movq %c, %rax; \
orq %b, %rax; \
andq %b, %rcx; \
andq %a, %rax; \
orq %rcx, %rax; \
addq %rax, %h;
/* Save registers, allocate scratch space */
movq %r10, %xmm0
movq %r11, %xmm1
movq %r12, %xmm2
movq %r13, %xmm3
movq %r14, %xmm4
movq %r15, %xmm5
movq %rbx, %xmm6
subq $128, %rsp
/* Load state */
movq 0(%rdi), %r8 /* a */
movq 8(%rdi), %r9 /* b */
movq 16(%rdi), %r10 /* c */
movq 24(%rdi), %r11 /* d */
movq 32(%rdi), %r12 /* e */
movq 40(%rdi), %r13 /* f */
movq 48(%rdi), %r14 /* g */
movq 56(%rdi), %r15 /* h */
/* Do 80 rounds of hashing */
ROUNDa( 0, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x428A2F98D728AE22)
ROUNDa( 1, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x7137449123EF65CD)
ROUNDa( 2, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB5C0FBCFEC4D3B2F)
ROUNDa( 3, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xE9B5DBA58189DBBC)
ROUNDa( 4, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x3956C25BF348B538)
ROUNDa( 5, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x59F111F1B605D019)
ROUNDa( 6, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x923F82A4AF194F9B)
ROUNDa( 7, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xAB1C5ED5DA6D8118)
ROUNDa( 8, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xD807AA98A3030242)
ROUNDa( 9, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x12835B0145706FBE)
ROUNDa(10, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x243185BE4EE4B28C)
ROUNDa(11, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x550C7DC3D5FFB4E2)
ROUNDa(12, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x72BE5D74F27B896F)
ROUNDa(13, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x80DEB1FE3B1696B1)
ROUNDa(14, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x9BDC06A725C71235)
ROUNDa(15, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC19BF174CF692694)
ROUNDb(16, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xE49B69C19EF14AD2)
ROUNDb(17, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xEFBE4786384F25E3)
ROUNDb(18, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x0FC19DC68B8CD5B5)
ROUNDb(19, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x240CA1CC77AC9C65)
ROUNDb(20, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x2DE92C6F592B0275)
ROUNDb(21, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4A7484AA6EA6E483)
ROUNDb(22, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5CB0A9DCBD41FBD4)
ROUNDb(23, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x76F988DA831153B5)
ROUNDb(24, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x983E5152EE66DFAB)
ROUNDb(25, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA831C66D2DB43210)
ROUNDb(26, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB00327C898FB213F)
ROUNDb(27, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xBF597FC7BEEF0EE4)
ROUNDb(28, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xC6E00BF33DA88FC2)
ROUNDb(29, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD5A79147930AA725)
ROUNDb(30, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x06CA6351E003826F)
ROUNDb(31, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x142929670A0E6E70)
ROUNDb(32, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x27B70A8546D22FFC)
ROUNDb(33, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x2E1B21385C26C926)
ROUNDb(34, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x4D2C6DFC5AC42AED)
ROUNDb(35, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x53380D139D95B3DF)
ROUNDb(36, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x650A73548BAF63DE)
ROUNDb(37, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x766A0ABB3C77B2A8)
ROUNDb(38, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x81C2C92E47EDAEE6)
ROUNDb(39, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x92722C851482353B)
ROUNDb(40, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xA2BFE8A14CF10364)
ROUNDb(41, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA81A664BBC423001)
ROUNDb(42, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xC24B8B70D0F89791)
ROUNDb(43, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xC76C51A30654BE30)
ROUNDb(44, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xD192E819D6EF5218)
ROUNDb(45, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD69906245565A910)
ROUNDb(46, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xF40E35855771202A)
ROUNDb(47, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x106AA07032BBD1B8)
ROUNDb(48, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x19A4C116B8D2D0C8)
ROUNDb(49, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x1E376C085141AB53)
ROUNDb(50, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x2748774CDF8EEB99)
ROUNDb(51, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x34B0BCB5E19B48A8)
ROUNDb(52, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x391C0CB3C5C95A63)
ROUNDb(53, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4ED8AA4AE3418ACB)
ROUNDb(54, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5B9CCA4F7763E373)
ROUNDb(55, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x682E6FF3D6B2B8A3)
ROUNDb(56, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x748F82EE5DEFB2FC)
ROUNDb(57, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x78A5636F43172F60)
ROUNDb(58, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x84C87814A1F0AB72)
ROUNDb(59, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x8CC702081A6439EC)
ROUNDb(60, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x90BEFFFA23631E28)
ROUNDb(61, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xA4506CEBDE82BDE9)
ROUNDb(62, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xBEF9A3F7B2C67915)
ROUNDb(63, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC67178F2E372532B)
ROUNDb(64, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xCA273ECEEA26619C)
ROUNDb(65, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xD186B8C721C0C207)
ROUNDb(66, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xEADA7DD6CDE0EB1E)
ROUNDb(67, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xF57D4F7FEE6ED178)
ROUNDb(68, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x06F067AA72176FBA)
ROUNDb(69, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x0A637DC5A2C898A6)
ROUNDb(70, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x113F9804BEF90DAE)
ROUNDb(71, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x1B710B35131C471B)
ROUNDb(72, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x28DB77F523047D84)
ROUNDb(73, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x32CAAB7B40C72493)
ROUNDb(74, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x3C9EBE0A15C9BEBC)
ROUNDb(75, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x431D67C49C100D4C)
ROUNDb(76, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x4CC5D4BECB3E42B6)
ROUNDb(77, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x597F299CFC657E2A)
ROUNDb(78, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5FCB6FAB3AD6FAEC)
ROUNDb(79, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x6C44198C4A475817)
/* Add to state */
addq %r8 , 0(%rdi)
addq %r9 , 8(%rdi)
addq %r10, 16(%rdi)
addq %r11, 24(%rdi)
addq %r12, 32(%rdi)
addq %r13, 40(%rdi)
addq %r14, 48(%rdi)
addq %r15, 56(%rdi)
/* Restore registers */
movq %xmm0, %r10
movq %xmm1, %r11
movq %xmm2, %r12
movq %xmm3, %r13
movq %xmm4, %r14
movq %xmm5, %r15
movq %xmm6, %rbx
addq $128, %rsp
retq
|