crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()

Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0.  This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Eric Biggers 2024-04-12 17:09:47 -07:00 committed by Herbert Xu
parent 1d27e1f5c8
commit ea9459ef36

View File

@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
.previous
/*
* _aesni_gf128mul_x_ble: internal ABI
* Multiply in GF(2^128) for XTS IVs
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
* input:
* IV: current IV
* GF128MUL_MASK == mask with 0x87 and 0x01
* output:
* IV: next IV
* changed:
* CTR: == temporary value
* KEY: == temporary value
*/
#define _aesni_gf128mul_x_ble() \
pshufd $0x13, IV, KEY; \
paddq IV, IV; \
psrad $31, KEY; \
pand GF128MUL_MASK, KEY; \
pxor KEY, IV;
.macro _aesni_gf128mul_x_ble
pshufd $0x13, IV, KEY
paddq IV, IV
psrad $31, KEY
pand GF128MUL_MASK, KEY
pxor KEY, IV
.endm
/*
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
.macro _aesni_xts_crypt enc
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
movups (IVP), IV
mov 480(KEYP), KLEN
.if !\enc
add $240, KEYP
.Lxts_enc_loop4:
test $15, LEN
jz .Lxts_loop4\@
sub $16, LEN
.endif
.Lxts_loop4\@:
sub $64, LEN
jl .Lxts_enc_1x
jl .Lxts_1x\@
movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
.if \enc
call _aesni_enc4
.else
call _aesni_dec4
.endif
movdqu 0x00(OUTP), IN
pxor IN, STATE1
@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
add $64, INP
add $64, OUTP
test LEN, LEN
jnz .Lxts_enc_loop4
jnz .Lxts_loop4\@
.Lxts_enc_ret_iv:
.Lxts_ret_iv\@:
movups IV, (IVP)
.Lxts_enc_ret:
.Lxts_ret\@:
#ifndef __x86_64__
popl KLEN
popl KEYP
@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
FRAME_END
RET
.Lxts_enc_1x:
.Lxts_1x\@:
add $64, LEN
jz .Lxts_enc_ret_iv
jz .Lxts_ret_iv\@
.if \enc
sub $16, LEN
jl .Lxts_enc_cts4
jl .Lxts_cts4\@
.endif
.Lxts_enc_loop1:
.Lxts_loop1\@:
movdqu (INP), STATE
.if \enc
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
_aesni_gf128mul_x_ble()
test LEN, LEN
jz .Lxts_enc_out
.else
add $16, INP
sub $16, LEN
jl .Lxts_enc_cts1
jl .Lxts_cts1\@
pxor IV, STATE
call _aesni_dec1
.endif
pxor IV, STATE
_aesni_gf128mul_x_ble
test LEN, LEN
jz .Lxts_out\@
.if \enc
add $16, INP
sub $16, LEN
jl .Lxts_cts1\@
.endif
movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_enc_loop1
jmp .Lxts_loop1\@
.Lxts_enc_out:
.Lxts_out\@:
movdqu STATE, (OUTP)
jmp .Lxts_enc_ret_iv
jmp .Lxts_ret_iv\@
.Lxts_enc_cts4:
.if \enc
.Lxts_cts4\@:
movdqa STATE4, STATE
sub $16, OUTP
.Lxts_cts1\@:
.else
.Lxts_cts1\@:
movdqa IV, STATE4
_aesni_gf128mul_x_ble
.Lxts_enc_cts1:
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
.endif
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc)
pblendvb IN2, IN1
movaps IN1, STATE
.if \enc
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
.else
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
.endif
movups STATE, (OUTP)
jmp .Lxts_enc_ret
jmp .Lxts_ret\@
.endm
/*
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
_aesni_xts_crypt 1
SYM_FUNC_END(aesni_xts_enc)
/*
@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc)
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_dec)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl (FRAME_OFFSET+28)(%esp), INP # src
movl (FRAME_OFFSET+32)(%esp), LEN # len
movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV
mov 480(KEYP), KLEN
add $240, KEYP
test $15, LEN
jz .Lxts_dec_loop4
sub $16, LEN
.Lxts_dec_loop4:
sub $64, LEN
jl .Lxts_dec_1x
movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
call _aesni_dec4
movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
movdqu 0x10(OUTP), IN
pxor IN, STATE2
movdqu STATE2, 0x10(OUTP)
movdqu 0x20(OUTP), IN
pxor IN, STATE3
movdqu STATE3, 0x20(OUTP)
movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
add $64, INP
add $64, OUTP
test LEN, LEN
jnz .Lxts_dec_loop4
.Lxts_dec_ret_iv:
movups IV, (IVP)
.Lxts_dec_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
RET
.Lxts_dec_1x:
add $64, LEN
jz .Lxts_dec_ret_iv
.Lxts_dec_loop1:
movdqu (INP), STATE
add $16, INP
sub $16, LEN
jl .Lxts_dec_cts1
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
_aesni_gf128mul_x_ble()
test LEN, LEN
jz .Lxts_dec_out
movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_dec_loop1
.Lxts_dec_out:
movdqu STATE, (OUTP)
jmp .Lxts_dec_ret_iv
.Lxts_dec_cts1:
movdqa IV, STATE4
_aesni_gf128mul_x_ble()
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
add LEN, INP /* rewind input pointer */
add $16, LEN /* # bytes in final block */
movups (INP), IN1
mov T1, IVP
add $32, IVP
add LEN, T1
sub LEN, IVP
add OUTP, LEN
movups (T1), %xmm4
movaps STATE, IN2
pshufb %xmm4, STATE
movups STATE, (LEN)
movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
movups STATE, (OUTP)
jmp .Lxts_dec_ret
_aesni_xts_crypt 0
SYM_FUNC_END(aesni_xts_dec)