crypto: aesni - Move HashKey computation from stack to gcm_context
HashKey computation only needs to happen once per scatter/gather operation, save it between calls in gcm_context struct instead of on the stack. Since the asm no longer stores anything on the stack, we can use %rsp directly, and clean up the frame save/restore macros a bit. Hashkeys actually only need to be calculated once per key and could be moved to when set_key is called, however, the current glue code falls back to generic aes code if fpu is disabled. Signed-off-by: Dave Watson <davejwatson@fb.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
e2e34b0856
commit
1476db2d12
@ -94,23 +94,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
|
|
||||||
|
|
||||||
#define STACK_OFFSET 8*3
|
#define STACK_OFFSET 8*3
|
||||||
#define HashKey 16*0 // store HashKey <<1 mod poly here
|
|
||||||
#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here
|
|
||||||
#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here
|
|
||||||
#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here
|
|
||||||
#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64
|
|
||||||
// bits of HashKey <<1 mod poly here
|
|
||||||
//(for Karatsuba purposes)
|
|
||||||
#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64
|
|
||||||
// bits of HashKey^2 <<1 mod poly here
|
|
||||||
// (for Karatsuba purposes)
|
|
||||||
#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64
|
|
||||||
// bits of HashKey^3 <<1 mod poly here
|
|
||||||
// (for Karatsuba purposes)
|
|
||||||
#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64
|
|
||||||
// bits of HashKey^4 <<1 mod poly here
|
|
||||||
// (for Karatsuba purposes)
|
|
||||||
#define VARIABLE_OFFSET 16*8
|
|
||||||
|
|
||||||
#define AadHash 16*0
|
#define AadHash 16*0
|
||||||
#define AadLen 16*1
|
#define AadLen 16*1
|
||||||
@ -119,6 +102,22 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
#define OrigIV 16*3
|
#define OrigIV 16*3
|
||||||
#define CurCount 16*4
|
#define CurCount 16*4
|
||||||
#define PBlockLen 16*5
|
#define PBlockLen 16*5
|
||||||
|
#define HashKey 16*6 // store HashKey <<1 mod poly here
|
||||||
|
#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here
|
||||||
|
#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here
|
||||||
|
#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here
|
||||||
|
#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64
|
||||||
|
// bits of HashKey <<1 mod poly here
|
||||||
|
//(for Karatsuba purposes)
|
||||||
|
#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64
|
||||||
|
// bits of HashKey^2 <<1 mod poly here
|
||||||
|
// (for Karatsuba purposes)
|
||||||
|
#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64
|
||||||
|
// bits of HashKey^3 <<1 mod poly here
|
||||||
|
// (for Karatsuba purposes)
|
||||||
|
#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64
|
||||||
|
// bits of HashKey^4 <<1 mod poly here
|
||||||
|
// (for Karatsuba purposes)
|
||||||
|
|
||||||
#define arg1 rdi
|
#define arg1 rdi
|
||||||
#define arg2 rsi
|
#define arg2 rsi
|
||||||
@ -126,11 +125,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
#define arg4 rcx
|
#define arg4 rcx
|
||||||
#define arg5 r8
|
#define arg5 r8
|
||||||
#define arg6 r9
|
#define arg6 r9
|
||||||
#define arg7 STACK_OFFSET+8(%r14)
|
#define arg7 STACK_OFFSET+8(%rsp)
|
||||||
#define arg8 STACK_OFFSET+16(%r14)
|
#define arg8 STACK_OFFSET+16(%rsp)
|
||||||
#define arg9 STACK_OFFSET+24(%r14)
|
#define arg9 STACK_OFFSET+24(%rsp)
|
||||||
#define arg10 STACK_OFFSET+32(%r14)
|
#define arg10 STACK_OFFSET+32(%rsp)
|
||||||
#define arg11 STACK_OFFSET+40(%r14)
|
#define arg11 STACK_OFFSET+40(%rsp)
|
||||||
#define keysize 2*15*16(%arg1)
|
#define keysize 2*15*16(%arg1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -184,28 +183,79 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
push %r12
|
push %r12
|
||||||
push %r13
|
push %r13
|
||||||
push %r14
|
push %r14
|
||||||
mov %rsp, %r14
|
|
||||||
#
|
#
|
||||||
# states of %xmm registers %xmm6:%xmm15 not saved
|
# states of %xmm registers %xmm6:%xmm15 not saved
|
||||||
# all %xmm registers are clobbered
|
# all %xmm registers are clobbered
|
||||||
#
|
#
|
||||||
sub $VARIABLE_OFFSET, %rsp
|
|
||||||
and $~63, %rsp
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
.macro FUNC_RESTORE
|
.macro FUNC_RESTORE
|
||||||
mov %r14, %rsp
|
|
||||||
pop %r14
|
pop %r14
|
||||||
pop %r13
|
pop %r13
|
||||||
pop %r12
|
pop %r12
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
# Precompute hashkeys.
|
||||||
|
# Input: Hash subkey.
|
||||||
|
# Output: HashKeys stored in gcm_context_data. Only needs to be called
|
||||||
|
# once per key.
|
||||||
|
# clobbers r12, and tmp xmm registers.
|
||||||
|
.macro PRECOMPUTE TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
|
||||||
|
mov arg7, %r12
|
||||||
|
movdqu (%r12), \TMP3
|
||||||
|
movdqa SHUF_MASK(%rip), \TMP2
|
||||||
|
PSHUFB_XMM \TMP2, \TMP3
|
||||||
|
|
||||||
|
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
|
||||||
|
|
||||||
|
movdqa \TMP3, \TMP2
|
||||||
|
psllq $1, \TMP3
|
||||||
|
psrlq $63, \TMP2
|
||||||
|
movdqa \TMP2, \TMP1
|
||||||
|
pslldq $8, \TMP2
|
||||||
|
psrldq $8, \TMP1
|
||||||
|
por \TMP2, \TMP3
|
||||||
|
|
||||||
|
# reduce HashKey<<1
|
||||||
|
|
||||||
|
pshufd $0x24, \TMP1, \TMP2
|
||||||
|
pcmpeqd TWOONE(%rip), \TMP2
|
||||||
|
pand POLY(%rip), \TMP2
|
||||||
|
pxor \TMP2, \TMP3
|
||||||
|
movdqa \TMP3, HashKey(%arg2)
|
||||||
|
|
||||||
|
movdqa \TMP3, \TMP5
|
||||||
|
pshufd $78, \TMP3, \TMP1
|
||||||
|
pxor \TMP3, \TMP1
|
||||||
|
movdqa \TMP1, HashKey_k(%arg2)
|
||||||
|
|
||||||
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
||||||
|
# TMP5 = HashKey^2<<1 (mod poly)
|
||||||
|
movdqa \TMP5, HashKey_2(%arg2)
|
||||||
|
# HashKey_2 = HashKey^2<<1 (mod poly)
|
||||||
|
pshufd $78, \TMP5, \TMP1
|
||||||
|
pxor \TMP5, \TMP1
|
||||||
|
movdqa \TMP1, HashKey_2_k(%arg2)
|
||||||
|
|
||||||
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
||||||
|
# TMP5 = HashKey^3<<1 (mod poly)
|
||||||
|
movdqa \TMP5, HashKey_3(%arg2)
|
||||||
|
pshufd $78, \TMP5, \TMP1
|
||||||
|
pxor \TMP5, \TMP1
|
||||||
|
movdqa \TMP1, HashKey_3_k(%arg2)
|
||||||
|
|
||||||
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
||||||
|
# TMP5 = HashKey^3<<1 (mod poly)
|
||||||
|
movdqa \TMP5, HashKey_4(%arg2)
|
||||||
|
pshufd $78, \TMP5, \TMP1
|
||||||
|
pxor \TMP5, \TMP1
|
||||||
|
movdqa \TMP1, HashKey_4_k(%arg2)
|
||||||
|
.endm
|
||||||
|
|
||||||
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
|
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
|
||||||
# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
|
# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
|
||||||
.macro GCM_INIT
|
.macro GCM_INIT
|
||||||
|
|
||||||
mov arg9, %r11
|
mov arg9, %r11
|
||||||
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
|
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
|
||||||
xor %r11, %r11
|
xor %r11, %r11
|
||||||
@ -220,28 +270,8 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
PSHUFB_XMM %xmm2, %xmm0
|
PSHUFB_XMM %xmm2, %xmm0
|
||||||
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
|
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
|
||||||
|
|
||||||
mov arg7, %r12
|
PRECOMPUTE %xmm1 %xmm2 %xmm3 %xmm4 %xmm5 %xmm6 %xmm7
|
||||||
movdqu (%r12), %xmm13
|
movdqa HashKey(%arg2), %xmm13
|
||||||
movdqa SHUF_MASK(%rip), %xmm2
|
|
||||||
PSHUFB_XMM %xmm2, %xmm13
|
|
||||||
|
|
||||||
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
|
|
||||||
|
|
||||||
movdqa %xmm13, %xmm2
|
|
||||||
psllq $1, %xmm13
|
|
||||||
psrlq $63, %xmm2
|
|
||||||
movdqa %xmm2, %xmm1
|
|
||||||
pslldq $8, %xmm2
|
|
||||||
psrldq $8, %xmm1
|
|
||||||
por %xmm2, %xmm13
|
|
||||||
|
|
||||||
# reduce HashKey<<1
|
|
||||||
|
|
||||||
pshufd $0x24, %xmm1, %xmm2
|
|
||||||
pcmpeqd TWOONE(%rip), %xmm2
|
|
||||||
pand POLY(%rip), %xmm2
|
|
||||||
pxor %xmm2, %xmm13
|
|
||||||
movdqa %xmm13, HashKey(%rsp)
|
|
||||||
|
|
||||||
CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \
|
CALC_AAD_HASH %xmm13 %xmm0 %xmm1 %xmm2 %xmm3 %xmm4 \
|
||||||
%xmm5 %xmm6
|
%xmm5 %xmm6
|
||||||
@ -253,7 +283,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||||||
# Clobbers rax, r10-r13, and xmm0-xmm15
|
# Clobbers rax, r10-r13, and xmm0-xmm15
|
||||||
.macro GCM_ENC_DEC operation
|
.macro GCM_ENC_DEC operation
|
||||||
movdqu AadHash(%arg2), %xmm8
|
movdqu AadHash(%arg2), %xmm8
|
||||||
movdqu HashKey(%rsp), %xmm13
|
movdqu HashKey(%arg2), %xmm13
|
||||||
add %arg5, InLen(%arg2)
|
add %arg5, InLen(%arg2)
|
||||||
mov %arg5, %r13 # save the number of bytes
|
mov %arg5, %r13 # save the number of bytes
|
||||||
and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
|
and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
|
||||||
@ -377,7 +407,7 @@ _multiple_of_16_bytes_\@:
|
|||||||
# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
|
# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
|
||||||
.macro GCM_COMPLETE
|
.macro GCM_COMPLETE
|
||||||
movdqu AadHash(%arg2), %xmm8
|
movdqu AadHash(%arg2), %xmm8
|
||||||
movdqu HashKey(%rsp), %xmm13
|
movdqu HashKey(%arg2), %xmm13
|
||||||
|
|
||||||
mov PBlockLen(%arg2), %r12
|
mov PBlockLen(%arg2), %r12
|
||||||
|
|
||||||
@ -584,7 +614,7 @@ _get_AAD_done\@:
|
|||||||
* the ciphertext
|
* the ciphertext
|
||||||
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
|
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
|
||||||
* are clobbered
|
* are clobbered
|
||||||
* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
|
* arg1, %arg2, %arg3 are used as a pointer only, not modified
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -695,17 +725,6 @@ aes_loop_initial_\@:
|
|||||||
pxor \TMP1, \XMM2
|
pxor \TMP1, \XMM2
|
||||||
pxor \TMP1, \XMM3
|
pxor \TMP1, \XMM3
|
||||||
pxor \TMP1, \XMM4
|
pxor \TMP1, \XMM4
|
||||||
movdqa \TMP3, \TMP5
|
|
||||||
pshufd $78, \TMP3, \TMP1
|
|
||||||
pxor \TMP3, \TMP1
|
|
||||||
movdqa \TMP1, HashKey_k(%rsp)
|
|
||||||
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
||||||
# TMP5 = HashKey^2<<1 (mod poly)
|
|
||||||
movdqa \TMP5, HashKey_2(%rsp)
|
|
||||||
# HashKey_2 = HashKey^2<<1 (mod poly)
|
|
||||||
pshufd $78, \TMP5, \TMP1
|
|
||||||
pxor \TMP5, \TMP1
|
|
||||||
movdqa \TMP1, HashKey_2_k(%rsp)
|
|
||||||
.irpc index, 1234 # do 4 rounds
|
.irpc index, 1234 # do 4 rounds
|
||||||
movaps 0x10*\index(%arg1), \TMP1
|
movaps 0x10*\index(%arg1), \TMP1
|
||||||
AESENC \TMP1, \XMM1
|
AESENC \TMP1, \XMM1
|
||||||
@ -713,12 +732,6 @@ aes_loop_initial_\@:
|
|||||||
AESENC \TMP1, \XMM3
|
AESENC \TMP1, \XMM3
|
||||||
AESENC \TMP1, \XMM4
|
AESENC \TMP1, \XMM4
|
||||||
.endr
|
.endr
|
||||||
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
||||||
# TMP5 = HashKey^3<<1 (mod poly)
|
|
||||||
movdqa \TMP5, HashKey_3(%rsp)
|
|
||||||
pshufd $78, \TMP5, \TMP1
|
|
||||||
pxor \TMP5, \TMP1
|
|
||||||
movdqa \TMP1, HashKey_3_k(%rsp)
|
|
||||||
.irpc index, 56789 # do next 5 rounds
|
.irpc index, 56789 # do next 5 rounds
|
||||||
movaps 0x10*\index(%arg1), \TMP1
|
movaps 0x10*\index(%arg1), \TMP1
|
||||||
AESENC \TMP1, \XMM1
|
AESENC \TMP1, \XMM1
|
||||||
@ -726,12 +739,6 @@ aes_loop_initial_\@:
|
|||||||
AESENC \TMP1, \XMM3
|
AESENC \TMP1, \XMM3
|
||||||
AESENC \TMP1, \XMM4
|
AESENC \TMP1, \XMM4
|
||||||
.endr
|
.endr
|
||||||
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
||||||
# TMP5 = HashKey^3<<1 (mod poly)
|
|
||||||
movdqa \TMP5, HashKey_4(%rsp)
|
|
||||||
pshufd $78, \TMP5, \TMP1
|
|
||||||
pxor \TMP5, \TMP1
|
|
||||||
movdqa \TMP1, HashKey_4_k(%rsp)
|
|
||||||
lea 0xa0(%arg1),%r10
|
lea 0xa0(%arg1),%r10
|
||||||
mov keysize,%eax
|
mov keysize,%eax
|
||||||
shr $2,%eax # 128->4, 192->6, 256->8
|
shr $2,%eax # 128->4, 192->6, 256->8
|
||||||
@ -816,7 +823,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
pshufd $78, \XMM5, \TMP6
|
pshufd $78, \XMM5, \TMP6
|
||||||
pxor \XMM5, \TMP6
|
pxor \XMM5, \TMP6
|
||||||
paddd ONE(%rip), \XMM0 # INCR CNT
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
||||||
movdqa HashKey_4(%rsp), \TMP5
|
movdqa HashKey_4(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
||||||
movdqa \XMM0, \XMM1
|
movdqa \XMM0, \XMM1
|
||||||
paddd ONE(%rip), \XMM0 # INCR CNT
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
||||||
@ -835,7 +842,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
pxor (%arg1), \XMM2
|
pxor (%arg1), \XMM2
|
||||||
pxor (%arg1), \XMM3
|
pxor (%arg1), \XMM3
|
||||||
pxor (%arg1), \XMM4
|
pxor (%arg1), \XMM4
|
||||||
movdqa HashKey_4_k(%rsp), \TMP5
|
movdqa HashKey_4_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x10(%arg1), \TMP1
|
movaps 0x10(%arg1), \TMP1
|
||||||
AESENC \TMP1, \XMM1 # Round 1
|
AESENC \TMP1, \XMM1 # Round 1
|
||||||
@ -850,7 +857,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM6, \TMP1
|
movdqa \XMM6, \TMP1
|
||||||
pshufd $78, \XMM6, \TMP2
|
pshufd $78, \XMM6, \TMP2
|
||||||
pxor \XMM6, \TMP2
|
pxor \XMM6, \TMP2
|
||||||
movdqa HashKey_3(%rsp), \TMP5
|
movdqa HashKey_3(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
||||||
movaps 0x30(%arg1), \TMP3
|
movaps 0x30(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 3
|
AESENC \TMP3, \XMM1 # Round 3
|
||||||
@ -863,7 +870,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
AESENC \TMP3, \XMM2
|
AESENC \TMP3, \XMM2
|
||||||
AESENC \TMP3, \XMM3
|
AESENC \TMP3, \XMM3
|
||||||
AESENC \TMP3, \XMM4
|
AESENC \TMP3, \XMM4
|
||||||
movdqa HashKey_3_k(%rsp), \TMP5
|
movdqa HashKey_3_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x50(%arg1), \TMP3
|
movaps 0x50(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 5
|
AESENC \TMP3, \XMM1 # Round 5
|
||||||
@ -877,7 +884,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM7, \TMP1
|
movdqa \XMM7, \TMP1
|
||||||
pshufd $78, \XMM7, \TMP2
|
pshufd $78, \XMM7, \TMP2
|
||||||
pxor \XMM7, \TMP2
|
pxor \XMM7, \TMP2
|
||||||
movdqa HashKey_2(%rsp ), \TMP5
|
movdqa HashKey_2(%arg2), \TMP5
|
||||||
|
|
||||||
# Multiply TMP5 * HashKey using karatsuba
|
# Multiply TMP5 * HashKey using karatsuba
|
||||||
|
|
||||||
@ -893,7 +900,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
AESENC \TMP3, \XMM2
|
AESENC \TMP3, \XMM2
|
||||||
AESENC \TMP3, \XMM3
|
AESENC \TMP3, \XMM3
|
||||||
AESENC \TMP3, \XMM4
|
AESENC \TMP3, \XMM4
|
||||||
movdqa HashKey_2_k(%rsp), \TMP5
|
movdqa HashKey_2_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x80(%arg1), \TMP3
|
movaps 0x80(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 8
|
AESENC \TMP3, \XMM1 # Round 8
|
||||||
@ -911,7 +918,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM8, \TMP1
|
movdqa \XMM8, \TMP1
|
||||||
pshufd $78, \XMM8, \TMP2
|
pshufd $78, \XMM8, \TMP2
|
||||||
pxor \XMM8, \TMP2
|
pxor \XMM8, \TMP2
|
||||||
movdqa HashKey(%rsp), \TMP5
|
movdqa HashKey(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
||||||
movaps 0x90(%arg1), \TMP3
|
movaps 0x90(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 9
|
AESENC \TMP3, \XMM1 # Round 9
|
||||||
@ -940,7 +947,7 @@ aes_loop_par_enc_done:
|
|||||||
AESENCLAST \TMP3, \XMM2
|
AESENCLAST \TMP3, \XMM2
|
||||||
AESENCLAST \TMP3, \XMM3
|
AESENCLAST \TMP3, \XMM3
|
||||||
AESENCLAST \TMP3, \XMM4
|
AESENCLAST \TMP3, \XMM4
|
||||||
movdqa HashKey_k(%rsp), \TMP5
|
movdqa HashKey_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movdqu (%arg4,%r11,1), \TMP3
|
movdqu (%arg4,%r11,1), \TMP3
|
||||||
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
||||||
@ -1024,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
pshufd $78, \XMM5, \TMP6
|
pshufd $78, \XMM5, \TMP6
|
||||||
pxor \XMM5, \TMP6
|
pxor \XMM5, \TMP6
|
||||||
paddd ONE(%rip), \XMM0 # INCR CNT
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
||||||
movdqa HashKey_4(%rsp), \TMP5
|
movdqa HashKey_4(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
||||||
movdqa \XMM0, \XMM1
|
movdqa \XMM0, \XMM1
|
||||||
paddd ONE(%rip), \XMM0 # INCR CNT
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
||||||
@ -1043,7 +1050,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
pxor (%arg1), \XMM2
|
pxor (%arg1), \XMM2
|
||||||
pxor (%arg1), \XMM3
|
pxor (%arg1), \XMM3
|
||||||
pxor (%arg1), \XMM4
|
pxor (%arg1), \XMM4
|
||||||
movdqa HashKey_4_k(%rsp), \TMP5
|
movdqa HashKey_4_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x10(%arg1), \TMP1
|
movaps 0x10(%arg1), \TMP1
|
||||||
AESENC \TMP1, \XMM1 # Round 1
|
AESENC \TMP1, \XMM1 # Round 1
|
||||||
@ -1058,7 +1065,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM6, \TMP1
|
movdqa \XMM6, \TMP1
|
||||||
pshufd $78, \XMM6, \TMP2
|
pshufd $78, \XMM6, \TMP2
|
||||||
pxor \XMM6, \TMP2
|
pxor \XMM6, \TMP2
|
||||||
movdqa HashKey_3(%rsp), \TMP5
|
movdqa HashKey_3(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
||||||
movaps 0x30(%arg1), \TMP3
|
movaps 0x30(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 3
|
AESENC \TMP3, \XMM1 # Round 3
|
||||||
@ -1071,7 +1078,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
AESENC \TMP3, \XMM2
|
AESENC \TMP3, \XMM2
|
||||||
AESENC \TMP3, \XMM3
|
AESENC \TMP3, \XMM3
|
||||||
AESENC \TMP3, \XMM4
|
AESENC \TMP3, \XMM4
|
||||||
movdqa HashKey_3_k(%rsp), \TMP5
|
movdqa HashKey_3_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x50(%arg1), \TMP3
|
movaps 0x50(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 5
|
AESENC \TMP3, \XMM1 # Round 5
|
||||||
@ -1085,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM7, \TMP1
|
movdqa \XMM7, \TMP1
|
||||||
pshufd $78, \XMM7, \TMP2
|
pshufd $78, \XMM7, \TMP2
|
||||||
pxor \XMM7, \TMP2
|
pxor \XMM7, \TMP2
|
||||||
movdqa HashKey_2(%rsp ), \TMP5
|
movdqa HashKey_2(%arg2), \TMP5
|
||||||
|
|
||||||
# Multiply TMP5 * HashKey using karatsuba
|
# Multiply TMP5 * HashKey using karatsuba
|
||||||
|
|
||||||
@ -1101,7 +1108,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
AESENC \TMP3, \XMM2
|
AESENC \TMP3, \XMM2
|
||||||
AESENC \TMP3, \XMM3
|
AESENC \TMP3, \XMM3
|
||||||
AESENC \TMP3, \XMM4
|
AESENC \TMP3, \XMM4
|
||||||
movdqa HashKey_2_k(%rsp), \TMP5
|
movdqa HashKey_2_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movaps 0x80(%arg1), \TMP3
|
movaps 0x80(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 8
|
AESENC \TMP3, \XMM1 # Round 8
|
||||||
@ -1119,7 +1126,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|||||||
movdqa \XMM8, \TMP1
|
movdqa \XMM8, \TMP1
|
||||||
pshufd $78, \XMM8, \TMP2
|
pshufd $78, \XMM8, \TMP2
|
||||||
pxor \XMM8, \TMP2
|
pxor \XMM8, \TMP2
|
||||||
movdqa HashKey(%rsp), \TMP5
|
movdqa HashKey(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
||||||
movaps 0x90(%arg1), \TMP3
|
movaps 0x90(%arg1), \TMP3
|
||||||
AESENC \TMP3, \XMM1 # Round 9
|
AESENC \TMP3, \XMM1 # Round 9
|
||||||
@ -1148,7 +1155,7 @@ aes_loop_par_dec_done:
|
|||||||
AESENCLAST \TMP3, \XMM2
|
AESENCLAST \TMP3, \XMM2
|
||||||
AESENCLAST \TMP3, \XMM3
|
AESENCLAST \TMP3, \XMM3
|
||||||
AESENCLAST \TMP3, \XMM4
|
AESENCLAST \TMP3, \XMM4
|
||||||
movdqa HashKey_k(%rsp), \TMP5
|
movdqa HashKey_k(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movdqu (%arg4,%r11,1), \TMP3
|
movdqu (%arg4,%r11,1), \TMP3
|
||||||
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
||||||
@ -1224,10 +1231,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|||||||
movdqa \XMM1, \TMP6
|
movdqa \XMM1, \TMP6
|
||||||
pshufd $78, \XMM1, \TMP2
|
pshufd $78, \XMM1, \TMP2
|
||||||
pxor \XMM1, \TMP2
|
pxor \XMM1, \TMP2
|
||||||
movdqa HashKey_4(%rsp), \TMP5
|
movdqa HashKey_4(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
|
||||||
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
|
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
|
||||||
movdqa HashKey_4_k(%rsp), \TMP4
|
movdqa HashKey_4_k(%arg2), \TMP4
|
||||||
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
movdqa \XMM1, \XMMDst
|
movdqa \XMM1, \XMMDst
|
||||||
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
|
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
|
||||||
@ -1237,10 +1244,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|||||||
movdqa \XMM2, \TMP1
|
movdqa \XMM2, \TMP1
|
||||||
pshufd $78, \XMM2, \TMP2
|
pshufd $78, \XMM2, \TMP2
|
||||||
pxor \XMM2, \TMP2
|
pxor \XMM2, \TMP2
|
||||||
movdqa HashKey_3(%rsp), \TMP5
|
movdqa HashKey_3(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
||||||
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
|
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
|
||||||
movdqa HashKey_3_k(%rsp), \TMP4
|
movdqa HashKey_3_k(%arg2), \TMP4
|
||||||
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
pxor \TMP1, \TMP6
|
pxor \TMP1, \TMP6
|
||||||
pxor \XMM2, \XMMDst
|
pxor \XMM2, \XMMDst
|
||||||
@ -1252,10 +1259,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|||||||
movdqa \XMM3, \TMP1
|
movdqa \XMM3, \TMP1
|
||||||
pshufd $78, \XMM3, \TMP2
|
pshufd $78, \XMM3, \TMP2
|
||||||
pxor \XMM3, \TMP2
|
pxor \XMM3, \TMP2
|
||||||
movdqa HashKey_2(%rsp), \TMP5
|
movdqa HashKey_2(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
||||||
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
|
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
|
||||||
movdqa HashKey_2_k(%rsp), \TMP4
|
movdqa HashKey_2_k(%arg2), \TMP4
|
||||||
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
pxor \TMP1, \TMP6
|
pxor \TMP1, \TMP6
|
||||||
pxor \XMM3, \XMMDst
|
pxor \XMM3, \XMMDst
|
||||||
@ -1265,10 +1272,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|||||||
movdqa \XMM4, \TMP1
|
movdqa \XMM4, \TMP1
|
||||||
pshufd $78, \XMM4, \TMP2
|
pshufd $78, \XMM4, \TMP2
|
||||||
pxor \XMM4, \TMP2
|
pxor \XMM4, \TMP2
|
||||||
movdqa HashKey(%rsp), \TMP5
|
movdqa HashKey(%arg2), \TMP5
|
||||||
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
||||||
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
|
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
|
||||||
movdqa HashKey_k(%rsp), \TMP4
|
movdqa HashKey_k(%arg2), \TMP4
|
||||||
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
||||||
pxor \TMP1, \TMP6
|
pxor \TMP1, \TMP6
|
||||||
pxor \XMM4, \XMMDst
|
pxor \XMM4, \XMMDst
|
||||||
|
Loading…
Reference in New Issue
Block a user