1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
|
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2024-06-16 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
CONST SEGMENT READONLY
align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
CONST ENDS
; _TEXT$SHA1OPT SEGMENT 'CODE'
ifndef x64
.686
.xmm
endif
ifdef x64
rNum equ REG_ABI_PARAM_2
if (IS_LINUX eq 0)
LOCAL_SIZE equ (16 * 2)
endif
else
rNum equ r0
LOCAL_SIZE equ (16 * 1)
endif
rState equ REG_ABI_PARAM_0
rData equ REG_ABI_PARAM_1
MY_sha1rnds4 macro a1, a2, imm
db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
endm
MY_SHA_INSTR macro cmd, a1, a2
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm
cmd_sha1nexte equ 0c8H
cmd_sha1msg1 equ 0c9H
cmd_sha1msg2 equ 0caH
MY_sha1nexte macro a1, a2
MY_SHA_INSTR cmd_sha1nexte, a1, a2
endm
MY_sha1msg1 macro a1, a2
MY_SHA_INSTR cmd_sha1msg1, a1, a2
endm
MY_sha1msg2 macro a1, a2
MY_SHA_INSTR cmd_sha1msg2, a1, a2
endm
MY_PROLOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
sub r4, LOCAL_SIZE + 8
movdqa [r4 ], xmm8
movdqa [r4 + 16], xmm9
endif
else ; x86
if (IS_CDECL gt 0)
mov rState, [r4 + REG_SIZE * 1]
mov rData, [r4 + REG_SIZE * 2]
mov rNum, [r4 + REG_SIZE * 3]
else ; fastcall
mov rNum, [r4 + REG_SIZE * 1]
endif
push r5
mov r5, r4
and r4, -16
sub r4, LOCAL_SIZE
endif
endm
MY_EPILOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa xmm8, [r4]
movdqa xmm9, [r4 + 16]
add r4, LOCAL_SIZE + 8
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
else ; x86
mov r4, r5
pop r5
endif
MY_ENDP
endm
e0_N equ 0
e1_N equ 1
abcd_N equ 2
e0_save_N equ 3
w_regs equ 4
e0 equ @CatStr(xmm, %e0_N)
e1 equ @CatStr(xmm, %e1_N)
abcd equ @CatStr(xmm, %abcd_N)
e0_save equ @CatStr(xmm, %e0_save_N)
ifdef x64
abcd_save equ xmm8
mask2 equ xmm9
else
abcd_save equ [r4]
mask2 equ e1
endif
LOAD_MASK macro
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
endm
LOAD_W macro k:req
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
pshufb @CatStr(xmm, %(w_regs + k)), mask2
endm
; pre2 can be 2 or 3 (recommended)
pre2 equ 3
pre1 equ (pre2 + 1)
NUM_ROUNDS4 equ 20
RND4 macro k
movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5
nextM = (w_regs + ((k + 1) mod 4))
if (k EQ NUM_ROUNDS4 - 1)
nextM = e0_save_N
endif
MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
endif
if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
endif
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
endif
endm
REVERSE_STATE macro
; abcd ; dcba
; e0 ; 000e
pshufd abcd, abcd, 01bH ; abcd
pshufd e0, e0, 01bH ; e000
endm
MY_PROC Sha1_UpdateBlocks_HW, 3
MY_PROLOG
cmp rNum, 0
je end_c
movdqu abcd, [rState] ; dcba
movd e0, dword ptr [rState + 16] ; 000e
REVERSE_STATE
ifdef x64
LOAD_MASK
endif
align 16
nextBlock:
movdqa abcd_save, abcd
movdqa e0_save, e0
ifndef x64
LOAD_MASK
endif
LOAD_W 0
LOAD_W 1
LOAD_W 2
LOAD_W 3
paddd e0, @CatStr(xmm, %(w_regs))
k = 0
rept NUM_ROUNDS4
RND4 k
k = k + 1
endm
paddd abcd, abcd_save
add rData, 64
sub rNum, 1
jnz nextBlock
REVERSE_STATE
movdqu [rState], abcd
movd dword ptr [rState + 16], e0
end_c:
MY_EPILOG
; _TEXT$SHA1OPT ENDS
end
|