Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - Do not idle omap device between crypto operations in one session. - Added sha224/sha384 shims for SSSE3. - More optimisations for camellia-aesni-avx2. - Removed defunct blowfish/twofish AVX2 implementations. - Added unaligned buffer self-tests. - Added PCLMULQDQ optimisation for CRCT10DIF. - Added support for Freescale's DCP co-processor - Misc fixes. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (44 commits) crypto: testmgr - test hash implementations with unaligned buffers crypto: testmgr - test AEADs with unaligned buffers crypto: testmgr - test skciphers with unaligned buffers crypto: testmgr - check that entries in alg_test_descs are in correct order Revert "crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher" Revert "crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher" crypto: camellia-aesni-avx2 - tune assembly code for more performance hwrng: bcm2835 - fix MODULE_LICENSE tag hwrng: nomadik - use clk_prepare_enable() crypto: picoxcell - replace strict_strtoul() with kstrtoul() crypto: dcp - Staticize local symbols crypto: dcp - Use NULL instead of 0 crypto: dcp - Use devm_* APIs crypto: dcp - Remove redundant platform_set_drvdata() hwrng: use platform_{get,set}_drvdata() crypto: omap-aes - Don't idle/start AES device between Encrypt operations crypto: crct10dif - Use PTR_RET crypto: ux500 - Cocci spatch "resource_size.spatch" crypto: sha256_ssse3 - add sha224 support crypto: sha512_ssse3 - add sha384 support ...
This commit is contained in:
commit
b2c311075d
@ -736,7 +736,7 @@
|
||||
dcp@80028000 {
|
||||
reg = <0x80028000 0x2000>;
|
||||
interrupts = <52 53 54>;
|
||||
status = "disabled";
|
||||
compatible = "fsl-dcp";
|
||||
};
|
||||
|
||||
pxp@8002a000 {
|
||||
|
@ -3,8 +3,6 @@
|
||||
#
|
||||
|
||||
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
|
||||
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
|
||||
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
||||
@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
|
||||
|
||||
# These modules require assembler to support AVX.
|
||||
ifeq ($(avx_supported),yes)
|
||||
@ -42,10 +41,8 @@ endif
|
||||
|
||||
# These modules require assembler to support AVX2.
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
|
||||
endif
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
||||
@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes)
|
||||
endif
|
||||
|
||||
ifeq ($(avx2_supported),yes)
|
||||
blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
|
||||
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
|
||||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||
twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
|
||||
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
|
||||
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
|
||||
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
|
||||
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
|
||||
|
@ -1,449 +0,0 @@
|
||||
/*
|
||||
* x86_64/AVX2 assembler optimized version of Blowfish
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "blowfish-avx2-asm_64.S"
|
||||
|
||||
.data
|
||||
.align 32
|
||||
|
||||
.Lprefetch_mask:
|
||||
.long 0*64
|
||||
.long 1*64
|
||||
.long 2*64
|
||||
.long 3*64
|
||||
.long 4*64
|
||||
.long 5*64
|
||||
.long 6*64
|
||||
.long 7*64
|
||||
|
||||
.Lbswap32_mask:
|
||||
.long 0x00010203
|
||||
.long 0x04050607
|
||||
.long 0x08090a0b
|
||||
.long 0x0c0d0e0f
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lbswap_iv_mask:
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.text
|
||||
/* structure of crypto context */
|
||||
#define p 0
|
||||
#define s0 ((16 + 2) * 4)
|
||||
#define s1 ((16 + 2 + (1 * 256)) * 4)
|
||||
#define s2 ((16 + 2 + (2 * 256)) * 4)
|
||||
#define s3 ((16 + 2 + (3 * 256)) * 4)
|
||||
|
||||
/* register macros */
|
||||
#define CTX %rdi
|
||||
#define RIO %rdx
|
||||
|
||||
#define RS0 %rax
|
||||
#define RS1 %r8
|
||||
#define RS2 %r9
|
||||
#define RS3 %r10
|
||||
|
||||
#define RLOOP %r11
|
||||
#define RLOOPd %r11d
|
||||
|
||||
#define RXr0 %ymm8
|
||||
#define RXr1 %ymm9
|
||||
#define RXr2 %ymm10
|
||||
#define RXr3 %ymm11
|
||||
#define RXl0 %ymm12
|
||||
#define RXl1 %ymm13
|
||||
#define RXl2 %ymm14
|
||||
#define RXl3 %ymm15
|
||||
|
||||
/* temp regs */
|
||||
#define RT0 %ymm0
|
||||
#define RT0x %xmm0
|
||||
#define RT1 %ymm1
|
||||
#define RT1x %xmm1
|
||||
#define RIDX0 %ymm2
|
||||
#define RIDX1 %ymm3
|
||||
#define RIDX1x %xmm3
|
||||
#define RIDX2 %ymm4
|
||||
#define RIDX3 %ymm5
|
||||
|
||||
/* vpgatherdd mask and '-1' */
|
||||
#define RNOT %ymm6
|
||||
|
||||
/* byte mask, (-1 >> 24) */
|
||||
#define RBYTE %ymm7
|
||||
|
||||
/***********************************************************************
|
||||
* 32-way AVX2 blowfish
|
||||
***********************************************************************/
|
||||
#define F(xl, xr) \
|
||||
vpsrld $24, xl, RIDX0; \
|
||||
vpsrld $16, xl, RIDX1; \
|
||||
vpsrld $8, xl, RIDX2; \
|
||||
vpand RBYTE, RIDX1, RIDX1; \
|
||||
vpand RBYTE, RIDX2, RIDX2; \
|
||||
vpand RBYTE, xl, RIDX3; \
|
||||
\
|
||||
vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpcmpeqd RIDX0, RIDX0, RIDX0; \
|
||||
\
|
||||
vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
|
||||
vpcmpeqd RIDX1, RIDX1, RIDX1; \
|
||||
vpaddd RT0, RT1, RT0; \
|
||||
\
|
||||
vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
|
||||
vpxor RT0, RT1, RT0; \
|
||||
\
|
||||
vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpaddd RT0, RT1, RT0; \
|
||||
\
|
||||
vpxor RT0, xr, xr;
|
||||
|
||||
#define add_roundkey(xl, nmem) \
|
||||
vpbroadcastd nmem, RT0; \
|
||||
vpxor RT0, xl ## 0, xl ## 0; \
|
||||
vpxor RT0, xl ## 1, xl ## 1; \
|
||||
vpxor RT0, xl ## 2, xl ## 2; \
|
||||
vpxor RT0, xl ## 3, xl ## 3;
|
||||
|
||||
#define round_enc() \
|
||||
add_roundkey(RXr, p(CTX,RLOOP,4)); \
|
||||
F(RXl0, RXr0); \
|
||||
F(RXl1, RXr1); \
|
||||
F(RXl2, RXr2); \
|
||||
F(RXl3, RXr3); \
|
||||
\
|
||||
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
|
||||
F(RXr0, RXl0); \
|
||||
F(RXr1, RXl1); \
|
||||
F(RXr2, RXl2); \
|
||||
F(RXr3, RXl3);
|
||||
|
||||
#define round_dec() \
|
||||
add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
|
||||
F(RXl0, RXr0); \
|
||||
F(RXl1, RXr1); \
|
||||
F(RXl2, RXr2); \
|
||||
F(RXl3, RXr3); \
|
||||
\
|
||||
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
|
||||
F(RXr0, RXl0); \
|
||||
F(RXr1, RXl1); \
|
||||
F(RXr2, RXl2); \
|
||||
F(RXr3, RXl3);
|
||||
|
||||
#define init_round_constants() \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
leaq s0(CTX), RS0; \
|
||||
leaq s1(CTX), RS1; \
|
||||
leaq s2(CTX), RS2; \
|
||||
leaq s3(CTX), RS3; \
|
||||
vpsrld $24, RNOT, RBYTE;
|
||||
|
||||
#define transpose_2x2(x0, x1, t0) \
|
||||
vpunpckldq x0, x1, t0; \
|
||||
vpunpckhdq x0, x1, x1; \
|
||||
\
|
||||
vpunpcklqdq t0, x1, x0; \
|
||||
vpunpckhqdq t0, x1, x1;
|
||||
|
||||
#define read_block(xl, xr) \
|
||||
vbroadcasti128 .Lbswap32_mask, RT1; \
|
||||
\
|
||||
vpshufb RT1, xl ## 0, xl ## 0; \
|
||||
vpshufb RT1, xr ## 0, xr ## 0; \
|
||||
vpshufb RT1, xl ## 1, xl ## 1; \
|
||||
vpshufb RT1, xr ## 1, xr ## 1; \
|
||||
vpshufb RT1, xl ## 2, xl ## 2; \
|
||||
vpshufb RT1, xr ## 2, xr ## 2; \
|
||||
vpshufb RT1, xl ## 3, xl ## 3; \
|
||||
vpshufb RT1, xr ## 3, xr ## 3; \
|
||||
\
|
||||
transpose_2x2(xl ## 0, xr ## 0, RT0); \
|
||||
transpose_2x2(xl ## 1, xr ## 1, RT0); \
|
||||
transpose_2x2(xl ## 2, xr ## 2, RT0); \
|
||||
transpose_2x2(xl ## 3, xr ## 3, RT0);
|
||||
|
||||
#define write_block(xl, xr) \
|
||||
vbroadcasti128 .Lbswap32_mask, RT1; \
|
||||
\
|
||||
transpose_2x2(xl ## 0, xr ## 0, RT0); \
|
||||
transpose_2x2(xl ## 1, xr ## 1, RT0); \
|
||||
transpose_2x2(xl ## 2, xr ## 2, RT0); \
|
||||
transpose_2x2(xl ## 3, xr ## 3, RT0); \
|
||||
\
|
||||
vpshufb RT1, xl ## 0, xl ## 0; \
|
||||
vpshufb RT1, xr ## 0, xr ## 0; \
|
||||
vpshufb RT1, xl ## 1, xl ## 1; \
|
||||
vpshufb RT1, xr ## 1, xr ## 1; \
|
||||
vpshufb RT1, xl ## 2, xl ## 2; \
|
||||
vpshufb RT1, xr ## 2, xr ## 2; \
|
||||
vpshufb RT1, xl ## 3, xl ## 3; \
|
||||
vpshufb RT1, xr ## 3, xr ## 3;
|
||||
|
||||
.align 8
|
||||
__blowfish_enc_blk32:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RXl0..4, RXr0..4: plaintext
|
||||
* output:
|
||||
* RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
|
||||
*/
|
||||
init_round_constants();
|
||||
|
||||
read_block(RXl, RXr);
|
||||
|
||||
movl $1, RLOOPd;
|
||||
add_roundkey(RXl, p+4*(0)(CTX));
|
||||
|
||||
.align 4
|
||||
.L__enc_loop:
|
||||
round_enc();
|
||||
|
||||
leal 2(RLOOPd), RLOOPd;
|
||||
cmpl $17, RLOOPd;
|
||||
jne .L__enc_loop;
|
||||
|
||||
add_roundkey(RXr, p+4*(17)(CTX));
|
||||
|
||||
write_block(RXl, RXr);
|
||||
|
||||
ret;
|
||||
ENDPROC(__blowfish_enc_blk32)
|
||||
|
||||
.align 8
|
||||
__blowfish_dec_blk32:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RXl0..4, RXr0..4: ciphertext
|
||||
* output:
|
||||
* RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
|
||||
*/
|
||||
init_round_constants();
|
||||
|
||||
read_block(RXl, RXr);
|
||||
|
||||
movl $14, RLOOPd;
|
||||
add_roundkey(RXl, p+4*(17)(CTX));
|
||||
|
||||
.align 4
|
||||
.L__dec_loop:
|
||||
round_dec();
|
||||
|
||||
addl $-2, RLOOPd;
|
||||
jns .L__dec_loop;
|
||||
|
||||
add_roundkey(RXr, p+4*(0)(CTX));
|
||||
|
||||
write_block(RXl, RXr);
|
||||
|
||||
ret;
|
||||
ENDPROC(__blowfish_dec_blk32)
|
||||
|
||||
ENTRY(blowfish_ecb_enc_32way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vmovdqu 0*32(%rdx), RXl0;
|
||||
vmovdqu 1*32(%rdx), RXr0;
|
||||
vmovdqu 2*32(%rdx), RXl1;
|
||||
vmovdqu 3*32(%rdx), RXr1;
|
||||
vmovdqu 4*32(%rdx), RXl2;
|
||||
vmovdqu 5*32(%rdx), RXr2;
|
||||
vmovdqu 6*32(%rdx), RXl3;
|
||||
vmovdqu 7*32(%rdx), RXr3;
|
||||
|
||||
call __blowfish_enc_blk32;
|
||||
|
||||
vmovdqu RXr0, 0*32(%rsi);
|
||||
vmovdqu RXl0, 1*32(%rsi);
|
||||
vmovdqu RXr1, 2*32(%rsi);
|
||||
vmovdqu RXl1, 3*32(%rsi);
|
||||
vmovdqu RXr2, 4*32(%rsi);
|
||||
vmovdqu RXl2, 5*32(%rsi);
|
||||
vmovdqu RXr3, 6*32(%rsi);
|
||||
vmovdqu RXl3, 7*32(%rsi);
|
||||
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_ecb_enc_32way)
|
||||
|
||||
ENTRY(blowfish_ecb_dec_32way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vmovdqu 0*32(%rdx), RXl0;
|
||||
vmovdqu 1*32(%rdx), RXr0;
|
||||
vmovdqu 2*32(%rdx), RXl1;
|
||||
vmovdqu 3*32(%rdx), RXr1;
|
||||
vmovdqu 4*32(%rdx), RXl2;
|
||||
vmovdqu 5*32(%rdx), RXr2;
|
||||
vmovdqu 6*32(%rdx), RXl3;
|
||||
vmovdqu 7*32(%rdx), RXr3;
|
||||
|
||||
call __blowfish_dec_blk32;
|
||||
|
||||
vmovdqu RXr0, 0*32(%rsi);
|
||||
vmovdqu RXl0, 1*32(%rsi);
|
||||
vmovdqu RXr1, 2*32(%rsi);
|
||||
vmovdqu RXl1, 3*32(%rsi);
|
||||
vmovdqu RXr2, 4*32(%rsi);
|
||||
vmovdqu RXl2, 5*32(%rsi);
|
||||
vmovdqu RXr3, 6*32(%rsi);
|
||||
vmovdqu RXl3, 7*32(%rsi);
|
||||
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_ecb_dec_32way)
|
||||
|
||||
ENTRY(blowfish_cbc_dec_32way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vmovdqu 0*32(%rdx), RXl0;
|
||||
vmovdqu 1*32(%rdx), RXr0;
|
||||
vmovdqu 2*32(%rdx), RXl1;
|
||||
vmovdqu 3*32(%rdx), RXr1;
|
||||
vmovdqu 4*32(%rdx), RXl2;
|
||||
vmovdqu 5*32(%rdx), RXr2;
|
||||
vmovdqu 6*32(%rdx), RXl3;
|
||||
vmovdqu 7*32(%rdx), RXr3;
|
||||
|
||||
call __blowfish_dec_blk32;
|
||||
|
||||
/* xor with src */
|
||||
vmovq (%rdx), RT0x;
|
||||
vpshufd $0x4f, RT0x, RT0x;
|
||||
vinserti128 $1, 8(%rdx), RT0, RT0;
|
||||
vpxor RT0, RXr0, RXr0;
|
||||
vpxor 0*32+24(%rdx), RXl0, RXl0;
|
||||
vpxor 1*32+24(%rdx), RXr1, RXr1;
|
||||
vpxor 2*32+24(%rdx), RXl1, RXl1;
|
||||
vpxor 3*32+24(%rdx), RXr2, RXr2;
|
||||
vpxor 4*32+24(%rdx), RXl2, RXl2;
|
||||
vpxor 5*32+24(%rdx), RXr3, RXr3;
|
||||
vpxor 6*32+24(%rdx), RXl3, RXl3;
|
||||
|
||||
vmovdqu RXr0, (0*32)(%rsi);
|
||||
vmovdqu RXl0, (1*32)(%rsi);
|
||||
vmovdqu RXr1, (2*32)(%rsi);
|
||||
vmovdqu RXl1, (3*32)(%rsi);
|
||||
vmovdqu RXr2, (4*32)(%rsi);
|
||||
vmovdqu RXl2, (5*32)(%rsi);
|
||||
vmovdqu RXr3, (6*32)(%rsi);
|
||||
vmovdqu RXl3, (7*32)(%rsi);
|
||||
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_cbc_dec_32way)
|
||||
|
||||
ENTRY(blowfish_ctr_32way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: iv (big endian, 64bit)
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vpcmpeqd RT0, RT0, RT0;
|
||||
vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
|
||||
|
||||
vpcmpeqd RT1x, RT1x, RT1x;
|
||||
vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
|
||||
vpxor RIDX0, RIDX0, RIDX0;
|
||||
vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
|
||||
|
||||
vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
|
||||
|
||||
vpcmpeqd RT1, RT1, RT1;
|
||||
vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
|
||||
vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
|
||||
|
||||
vbroadcasti128 .Lbswap_iv_mask, RIDX0;
|
||||
vbroadcasti128 .Lbswap128_mask, RIDX1;
|
||||
|
||||
/* load IV and byteswap */
|
||||
vmovq (%rcx), RT1x;
|
||||
vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
|
||||
vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
|
||||
|
||||
/* construct IVs */
|
||||
vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
|
||||
vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
|
||||
vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
|
||||
vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXl1;
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXr1;
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXl2;
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXr2;
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXl3;
|
||||
vpsubq RIDX2, RT1, RT1;
|
||||
vpshufb RIDX1, RT1, RXr3;
|
||||
|
||||
/* store last IV */
|
||||
vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
|
||||
vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
|
||||
vmovq RT1x, (%rcx);
|
||||
|
||||
call __blowfish_enc_blk32;
|
||||
|
||||
/* dst = src ^ iv */
|
||||
vpxor 0*32(%rdx), RXr0, RXr0;
|
||||
vpxor 1*32(%rdx), RXl0, RXl0;
|
||||
vpxor 2*32(%rdx), RXr1, RXr1;
|
||||
vpxor 3*32(%rdx), RXl1, RXl1;
|
||||
vpxor 4*32(%rdx), RXr2, RXr2;
|
||||
vpxor 5*32(%rdx), RXl2, RXl2;
|
||||
vpxor 6*32(%rdx), RXr3, RXr3;
|
||||
vpxor 7*32(%rdx), RXl3, RXl3;
|
||||
vmovdqu RXr0, (0*32)(%rsi);
|
||||
vmovdqu RXl0, (1*32)(%rsi);
|
||||
vmovdqu RXr1, (2*32)(%rsi);
|
||||
vmovdqu RXl1, (3*32)(%rsi);
|
||||
vmovdqu RXr2, (4*32)(%rsi);
|
||||
vmovdqu RXl2, (5*32)(%rsi);
|
||||
vmovdqu RXr3, (6*32)(%rsi);
|
||||
vmovdqu RXl3, (7*32)(%rsi);
|
||||
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_ctr_32way)
|
@ -1,585 +0,0 @@
|
||||
/*
|
||||
* Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
|
||||
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
* CTR part based on code (crypto/ctr.c) by:
|
||||
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/blowfish.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/blowfish.h>
|
||||
#include <asm/crypto/ablk_helper.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
|
||||
#define BF_AVX2_PARALLEL_BLOCKS 32
|
||||
|
||||
/* 32-way AVX2 parallel cipher functions */
|
||||
asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
|
||||
__be64 *iv);
|
||||
|
||||
static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
if (fpu_enabled)
|
||||
return true;
|
||||
|
||||
/* FPU is only used when chunk to be processed is large enough, so
|
||||
* do not enable FPU until it is necessary.
|
||||
*/
|
||||
if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
|
||||
return false;
|
||||
|
||||
kernel_fpu_begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void bf_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
if (fpu_enabled)
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
bool enc)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk->nbytes)) {
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
|
||||
|
||||
/* Process multi-block AVX2 batch */
|
||||
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
if (enc)
|
||||
blowfish_ecb_enc_32way(ctx, wdst, wsrc);
|
||||
else
|
||||
blowfish_ecb_dec_32way(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
|
||||
wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
if (enc)
|
||||
blowfish_enc_blk_4way(ctx, wdst, wsrc);
|
||||
else
|
||||
blowfish_dec_blk_4way(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize * BF_PARALLEL_BLOCKS;
|
||||
wdst += bsize * BF_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * BF_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
if (enc)
|
||||
blowfish_enc_blk(ctx, wdst, wsrc);
|
||||
else
|
||||
blowfish_dec_blk(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize;
|
||||
wdst += bsize;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
bf_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, true);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, false);
|
||||
}
|
||||
|
||||
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 *iv = (u64 *)walk->iv;
|
||||
|
||||
do {
|
||||
*dst = *src ^ *iv;
|
||||
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
|
||||
iv = dst;
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
*(u64 *)walk->iv = *iv;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_encrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 last_iv;
|
||||
int i;
|
||||
|
||||
/* Start of the last block. */
|
||||
src += nbytes / bsize - 1;
|
||||
dst += nbytes / bsize - 1;
|
||||
|
||||
last_iv = *src;
|
||||
|
||||
/* Process multi-block AVX2 batch */
|
||||
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
|
||||
src -= BF_AVX2_PARALLEL_BLOCKS - 1;
|
||||
dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
|
||||
|
||||
blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
|
||||
u64 ivs[BF_PARALLEL_BLOCKS - 1];
|
||||
|
||||
do {
|
||||
nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
|
||||
src -= BF_PARALLEL_BLOCKS - 1;
|
||||
dst -= BF_PARALLEL_BLOCKS - 1;
|
||||
|
||||
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
|
||||
ivs[i] = src[i];
|
||||
|
||||
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
|
||||
dst[i + 1] ^= ivs[i];
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
for (;;) {
|
||||
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
break;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
}
|
||||
|
||||
done:
|
||||
*dst ^= *(u64 *)walk->iv;
|
||||
*(u64 *)walk->iv = last_iv;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
bf_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_crypt_final(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 keystream[BF_BLOCK_SIZE];
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
|
||||
blowfish_enc_blk(ctx, keystream, ctrblk);
|
||||
crypto_xor(keystream, src, nbytes);
|
||||
memcpy(dst, keystream, nbytes);
|
||||
|
||||
crypto_inc(ctrblk, BF_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
int i;
|
||||
|
||||
/* Process multi-block AVX2 batch */
|
||||
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
|
||||
(__be64 *)walk->iv);
|
||||
|
||||
src += BF_AVX2_PARALLEL_BLOCKS;
|
||||
dst += BF_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
|
||||
__be64 ctrblocks[BF_PARALLEL_BLOCKS];
|
||||
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
|
||||
|
||||
do {
|
||||
/* create ctrblks for parallel encrypt */
|
||||
for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
|
||||
if (dst != src)
|
||||
dst[i] = src[i];
|
||||
|
||||
ctrblocks[i] = cpu_to_be64(ctrblk++);
|
||||
}
|
||||
|
||||
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
|
||||
(u8 *)ctrblocks);
|
||||
|
||||
src += BF_PARALLEL_BLOCKS;
|
||||
dst += BF_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * BF_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
|
||||
|
||||
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
u64 ctrblk;
|
||||
|
||||
if (dst != src)
|
||||
*dst = *src;
|
||||
|
||||
ctrblk = *(u64 *)walk->iv;
|
||||
be64_add_cpu((__be64 *)walk->iv, 1);
|
||||
|
||||
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
} while ((nbytes -= bsize) >= bsize);
|
||||
|
||||
done:
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
|
||||
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
bf_fpu_end(fpu_enabled);
|
||||
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg bf_algs[6] = { {
|
||||
.cra_name = "__ecb-blowfish-avx2",
|
||||
.cra_driver_name = "__driver-ecb-blowfish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-blowfish-avx2",
|
||||
.cra_driver_name = "__driver-cbc-blowfish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-blowfish-avx2",
|
||||
.cra_driver_name = "__driver-ctr-blowfish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.ivsize = BF_BLOCK_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(blowfish)",
|
||||
.cra_driver_name = "ecb-blowfish-avx2",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(blowfish)",
|
||||
.cra_driver_name = "cbc-blowfish-avx2",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.ivsize = BF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(blowfish)",
|
||||
.cra_driver_name = "ctr-blowfish-avx2",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.ivsize = BF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx2 || !cpu_has_osxsave) {
|
||||
pr_info("AVX2 instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
|
||||
}
|
||||
|
||||
static void __exit fini(void)
|
||||
{
|
||||
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
|
||||
}
|
||||
|
||||
module_init(init);
|
||||
module_exit(fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
|
||||
MODULE_ALIAS("blowfish");
|
||||
MODULE_ALIAS("blowfish-asm");
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Glue Code for assembler optimized version of Blowfish
|
||||
*
|
||||
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
|
||||
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
@ -32,24 +32,40 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <asm/crypto/blowfish.h>
|
||||
|
||||
/* regular block cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
|
||||
bool xor);
|
||||
EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
|
||||
|
||||
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(blowfish_dec_blk);
|
||||
|
||||
/* 4-way parallel cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src, bool xor);
|
||||
EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
|
||||
|
||||
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
|
||||
|
||||
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
|
@ -51,16 +51,6 @@
|
||||
#define ymm14_x xmm14
|
||||
#define ymm15_x xmm15
|
||||
|
||||
/*
|
||||
* AES-NI instructions do not support ymmX registers, so we need splitting and
|
||||
* merging.
|
||||
*/
|
||||
#define vaesenclast256(zero, yreg, tmp) \
|
||||
vextracti128 $1, yreg, tmp##_x; \
|
||||
vaesenclast zero##_x, yreg##_x, yreg##_x; \
|
||||
vaesenclast zero##_x, tmp##_x, tmp##_x; \
|
||||
vinserti128 $1, tmp##_x, yreg, yreg;
|
||||
|
||||
/**********************************************************************
|
||||
32-way camellia
|
||||
**********************************************************************/
|
||||
@ -79,46 +69,70 @@
|
||||
* S-function with AES subbytes \
|
||||
*/ \
|
||||
vbroadcasti128 .Linv_shift_row, t4; \
|
||||
vpbroadcastb .L0f0f0f0f, t7; \
|
||||
vbroadcasti128 .Lpre_tf_lo_s1, t0; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s1, t1; \
|
||||
vpbroadcastd .L0f0f0f0f, t7; \
|
||||
vbroadcasti128 .Lpre_tf_lo_s1, t5; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s1, t6; \
|
||||
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
|
||||
\
|
||||
/* AES inverse shift rows */ \
|
||||
vpshufb t4, x0, x0; \
|
||||
vpshufb t4, x7, x7; \
|
||||
vpshufb t4, x1, x1; \
|
||||
vpshufb t4, x4, x4; \
|
||||
vpshufb t4, x2, x2; \
|
||||
vpshufb t4, x5, x5; \
|
||||
vpshufb t4, x3, x3; \
|
||||
vpshufb t4, x6, x6; \
|
||||
vpshufb t4, x2, x2; \
|
||||
vpshufb t4, x5, x5; \
|
||||
vpshufb t4, x1, x1; \
|
||||
vpshufb t4, x4, x4; \
|
||||
\
|
||||
/* prefilter sboxes 1, 2 and 3 */ \
|
||||
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
|
||||
filter_8bit(x0, t0, t1, t7, t6); \
|
||||
filter_8bit(x7, t0, t1, t7, t6); \
|
||||
filter_8bit(x1, t0, t1, t7, t6); \
|
||||
filter_8bit(x4, t0, t1, t7, t6); \
|
||||
filter_8bit(x2, t0, t1, t7, t6); \
|
||||
filter_8bit(x5, t0, t1, t7, t6); \
|
||||
\
|
||||
/* prefilter sbox 4 */ \
|
||||
filter_8bit(x0, t5, t6, t7, t4); \
|
||||
filter_8bit(x7, t5, t6, t7, t4); \
|
||||
vextracti128 $1, x0, t0##_x; \
|
||||
vextracti128 $1, x7, t1##_x; \
|
||||
filter_8bit(x3, t2, t3, t7, t4); \
|
||||
filter_8bit(x6, t2, t3, t7, t4); \
|
||||
vextracti128 $1, x3, t3##_x; \
|
||||
vextracti128 $1, x6, t2##_x; \
|
||||
filter_8bit(x2, t5, t6, t7, t4); \
|
||||
filter_8bit(x5, t5, t6, t7, t4); \
|
||||
filter_8bit(x1, t5, t6, t7, t4); \
|
||||
filter_8bit(x4, t5, t6, t7, t4); \
|
||||
\
|
||||
vpxor t4##_x, t4##_x, t4##_x; \
|
||||
filter_8bit(x3, t2, t3, t7, t6); \
|
||||
filter_8bit(x6, t2, t3, t7, t6); \
|
||||
\
|
||||
/* AES subbytes + AES shift rows */ \
|
||||
vextracti128 $1, x2, t6##_x; \
|
||||
vextracti128 $1, x5, t5##_x; \
|
||||
vaesenclast t4##_x, x0##_x, x0##_x; \
|
||||
vaesenclast t4##_x, t0##_x, t0##_x; \
|
||||
vinserti128 $1, t0##_x, x0, x0; \
|
||||
vaesenclast t4##_x, x7##_x, x7##_x; \
|
||||
vaesenclast t4##_x, t1##_x, t1##_x; \
|
||||
vinserti128 $1, t1##_x, x7, x7; \
|
||||
vaesenclast t4##_x, x3##_x, x3##_x; \
|
||||
vaesenclast t4##_x, t3##_x, t3##_x; \
|
||||
vinserti128 $1, t3##_x, x3, x3; \
|
||||
vaesenclast t4##_x, x6##_x, x6##_x; \
|
||||
vaesenclast t4##_x, t2##_x, t2##_x; \
|
||||
vinserti128 $1, t2##_x, x6, x6; \
|
||||
vextracti128 $1, x1, t3##_x; \
|
||||
vextracti128 $1, x4, t2##_x; \
|
||||
vbroadcasti128 .Lpost_tf_lo_s1, t0; \
|
||||
vbroadcasti128 .Lpost_tf_hi_s1, t1; \
|
||||
vaesenclast256(t4, x0, t5); \
|
||||
vaesenclast256(t4, x7, t5); \
|
||||
vaesenclast256(t4, x1, t5); \
|
||||
vaesenclast256(t4, x4, t5); \
|
||||
vaesenclast256(t4, x2, t5); \
|
||||
vaesenclast256(t4, x5, t5); \
|
||||
vaesenclast256(t4, x3, t5); \
|
||||
vaesenclast256(t4, x6, t5); \
|
||||
vaesenclast t4##_x, x2##_x, x2##_x; \
|
||||
vaesenclast t4##_x, t6##_x, t6##_x; \
|
||||
vinserti128 $1, t6##_x, x2, x2; \
|
||||
vaesenclast t4##_x, x5##_x, x5##_x; \
|
||||
vaesenclast t4##_x, t5##_x, t5##_x; \
|
||||
vinserti128 $1, t5##_x, x5, x5; \
|
||||
vaesenclast t4##_x, x1##_x, x1##_x; \
|
||||
vaesenclast t4##_x, t3##_x, t3##_x; \
|
||||
vinserti128 $1, t3##_x, x1, x1; \
|
||||
vaesenclast t4##_x, x4##_x, x4##_x; \
|
||||
vaesenclast t4##_x, t2##_x, t2##_x; \
|
||||
vinserti128 $1, t2##_x, x4, x4; \
|
||||
\
|
||||
/* postfilter sboxes 1 and 4 */ \
|
||||
vbroadcasti128 .Lpost_tf_lo_s3, t2; \
|
||||
@ -139,22 +153,12 @@
|
||||
/* postfilter sbox 2 */ \
|
||||
filter_8bit(x1, t4, t5, t7, t2); \
|
||||
filter_8bit(x4, t4, t5, t7, t2); \
|
||||
vpxor t7, t7, t7; \
|
||||
\
|
||||
vpsrldq $1, t0, t1; \
|
||||
vpsrldq $2, t0, t2; \
|
||||
vpshufb t7, t1, t1; \
|
||||
vpsrldq $3, t0, t3; \
|
||||
vpsrldq $4, t0, t4; \
|
||||
vpsrldq $5, t0, t5; \
|
||||
vpsrldq $6, t0, t6; \
|
||||
vpsrldq $7, t0, t7; \
|
||||
vpbroadcastb t0##_x, t0; \
|
||||
vpbroadcastb t1##_x, t1; \
|
||||
vpbroadcastb t2##_x, t2; \
|
||||
vpbroadcastb t3##_x, t3; \
|
||||
vpbroadcastb t4##_x, t4; \
|
||||
vpbroadcastb t6##_x, t6; \
|
||||
vpbroadcastb t5##_x, t5; \
|
||||
vpbroadcastb t7##_x, t7; \
|
||||
\
|
||||
/* P-function */ \
|
||||
vpxor x5, x0, x0; \
|
||||
@ -162,11 +166,21 @@
|
||||
vpxor x7, x2, x2; \
|
||||
vpxor x4, x3, x3; \
|
||||
\
|
||||
vpshufb t7, t2, t2; \
|
||||
vpsrldq $4, t0, t4; \
|
||||
vpshufb t7, t3, t3; \
|
||||
vpsrldq $5, t0, t5; \
|
||||
vpshufb t7, t4, t4; \
|
||||
\
|
||||
vpxor x2, x4, x4; \
|
||||
vpxor x3, x5, x5; \
|
||||
vpxor x0, x6, x6; \
|
||||
vpxor x1, x7, x7; \
|
||||
\
|
||||
vpsrldq $6, t0, t6; \
|
||||
vpshufb t7, t5, t5; \
|
||||
vpshufb t7, t6, t6; \
|
||||
\
|
||||
vpxor x7, x0, x0; \
|
||||
vpxor x4, x1, x1; \
|
||||
vpxor x5, x2, x2; \
|
||||
@ -179,12 +193,16 @@
|
||||
\
|
||||
/* Add key material and result to CD (x becomes new CD) */ \
|
||||
\
|
||||
vpxor t7, x0, x0; \
|
||||
vpxor 4 * 32(mem_cd), x0, x0; \
|
||||
\
|
||||
vpxor t6, x1, x1; \
|
||||
vpxor 5 * 32(mem_cd), x1, x1; \
|
||||
\
|
||||
vpsrldq $7, t0, t6; \
|
||||
vpshufb t7, t0, t0; \
|
||||
vpshufb t7, t6, t7; \
|
||||
\
|
||||
vpxor t7, x0, x0; \
|
||||
vpxor 4 * 32(mem_cd), x0, x0; \
|
||||
\
|
||||
vpxor t5, x2, x2; \
|
||||
vpxor 6 * 32(mem_cd), x2, x2; \
|
||||
\
|
||||
@ -204,7 +222,7 @@
|
||||
vpxor 3 * 32(mem_cd), x7, x7;
|
||||
|
||||
/*
|
||||
* Size optimization... with inlined roundsm16 binary would be over 5 times
|
||||
* Size optimization... with inlined roundsm32 binary would be over 5 times
|
||||
* larger and would only marginally faster.
|
||||
*/
|
||||
.align 8
|
||||
@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
*/ \
|
||||
vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
|
||||
vpxor tt0, tt0, tt0; \
|
||||
vpbroadcastb t0##_x, t3; \
|
||||
vpshufb tt0, t0, t3; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t2; \
|
||||
vpshufb tt0, t0, t2; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t1; \
|
||||
vpshufb tt0, t0, t1; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t0; \
|
||||
vpshufb tt0, t0, t0; \
|
||||
\
|
||||
vpand l0, t0, t0; \
|
||||
vpand l1, t1, t1; \
|
||||
@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
|
||||
\
|
||||
vpxor l4, t0, l4; \
|
||||
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
|
||||
vmovdqu l4, 4 * 32(l); \
|
||||
vpxor l5, t1, l5; \
|
||||
vmovdqu l5, 5 * 32(l); \
|
||||
@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
* rl ^= t2; \
|
||||
*/ \
|
||||
\
|
||||
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
|
||||
vpbroadcastb t0##_x, t3; \
|
||||
vpshufb tt0, t0, t3; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t2; \
|
||||
vpshufb tt0, t0, t2; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t1; \
|
||||
vpshufb tt0, t0, t1; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t0; \
|
||||
vpshufb tt0, t0, t0; \
|
||||
\
|
||||
vpor 4 * 32(r), t0, t0; \
|
||||
vpor 5 * 32(r), t1, t1; \
|
||||
@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vpxor 2 * 32(r), t2, t2; \
|
||||
vpxor 3 * 32(r), t3, t3; \
|
||||
vmovdqu t0, 0 * 32(r); \
|
||||
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
|
||||
vmovdqu t1, 1 * 32(r); \
|
||||
vmovdqu t2, 2 * 32(r); \
|
||||
vmovdqu t3, 3 * 32(r); \
|
||||
@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
* t2 &= rl; \
|
||||
* rr ^= rol32(t2, 1); \
|
||||
*/ \
|
||||
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
|
||||
vpbroadcastb t0##_x, t3; \
|
||||
vpshufb tt0, t0, t3; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t2; \
|
||||
vpshufb tt0, t0, t2; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t1; \
|
||||
vpshufb tt0, t0, t1; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t0; \
|
||||
vpshufb tt0, t0, t0; \
|
||||
\
|
||||
vpand 0 * 32(r), t0, t0; \
|
||||
vpand 1 * 32(r), t1, t1; \
|
||||
@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vpxor 6 * 32(r), t2, t2; \
|
||||
vpxor 7 * 32(r), t3, t3; \
|
||||
vmovdqu t0, 4 * 32(r); \
|
||||
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
|
||||
vmovdqu t1, 5 * 32(r); \
|
||||
vmovdqu t2, 6 * 32(r); \
|
||||
vmovdqu t3, 7 * 32(r); \
|
||||
@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
* ll ^= t0; \
|
||||
*/ \
|
||||
\
|
||||
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
|
||||
vpbroadcastb t0##_x, t3; \
|
||||
vpshufb tt0, t0, t3; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t2; \
|
||||
vpshufb tt0, t0, t2; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t1; \
|
||||
vpshufb tt0, t0, t1; \
|
||||
vpsrldq $1, t0, t0; \
|
||||
vpbroadcastb t0##_x, t0; \
|
||||
vpshufb tt0, t0, t0; \
|
||||
\
|
||||
vpor l4, t0, t0; \
|
||||
vpor l5, t1, t1; \
|
||||
|
643
arch/x86/crypto/crct10dif-pcl-asm_64.S
Normal file
643
arch/x86/crypto/crct10dif-pcl-asm_64.S
Normal file
@ -0,0 +1,643 @@
|
||||
########################################################################
|
||||
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
|
||||
#
|
||||
# Copyright (c) 2013, Intel Corporation
|
||||
#
|
||||
# Authors:
|
||||
# Erdinc Ozturk <erdinc.ozturk@intel.com>
|
||||
# Vinodh Gopal <vinodh.gopal@intel.com>
|
||||
# James Guilford <james.guilford@intel.com>
|
||||
# Tim Chen <tim.c.chen@linux.intel.com>
|
||||
#
|
||||
# This software is available to you under a choice of one of two
|
||||
# licenses. You may choose to be licensed under the terms of the GNU
|
||||
# General Public License (GPL) Version 2, available from the file
|
||||
# COPYING in the main directory of this source tree, or the
|
||||
# OpenIB.org BSD license below:
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
#
|
||||
# * Neither the name of the Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
# Function API:
|
||||
# UINT16 crc_t10dif_pcl(
|
||||
# UINT16 init_crc, //initial CRC value, 16 bits
|
||||
# const unsigned char *buf, //buffer pointer to calculate CRC on
|
||||
# UINT64 len //buffer length in bytes (64-bit data)
|
||||
# );
|
||||
#
|
||||
# Reference paper titled "Fast CRC Computation for Generic
|
||||
# Polynomials Using PCLMULQDQ Instruction"
|
||||
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
|
||||
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||
#
|
||||
#
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
#define arg3 %rdx
|
||||
|
||||
#define arg1_low32 %edi
|
||||
|
||||
ENTRY(crc_t10dif_pcl)
|
||||
.align 16
|
||||
|
||||
# adjust the 16-bit initial_crc value, scale it to 32 bits
|
||||
shl $16, arg1_low32
|
||||
|
||||
# Allocate Stack Space
|
||||
mov %rsp, %rcx
|
||||
sub $16*2, %rsp
|
||||
# align stack to 16 byte boundary
|
||||
and $~(0x10 - 1), %rsp
|
||||
|
||||
# check if smaller than 256
|
||||
cmp $256, arg3
|
||||
|
||||
# for sizes less than 128, we can't fold 64B at a time...
|
||||
jl _less_than_128
|
||||
|
||||
|
||||
# load the initial crc value
|
||||
movd arg1_low32, %xmm10 # initial crc
|
||||
|
||||
# crc value does not need to be byte-reflected, but it needs
|
||||
# to be moved to the high part of the register.
|
||||
# because data will be byte-reflected and will align with
|
||||
# initial crc at correct place.
|
||||
pslldq $12, %xmm10
|
||||
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
# receive the initial 64B data, xor the initial crc value
|
||||
movdqu 16*0(arg2), %xmm0
|
||||
movdqu 16*1(arg2), %xmm1
|
||||
movdqu 16*2(arg2), %xmm2
|
||||
movdqu 16*3(arg2), %xmm3
|
||||
movdqu 16*4(arg2), %xmm4
|
||||
movdqu 16*5(arg2), %xmm5
|
||||
movdqu 16*6(arg2), %xmm6
|
||||
movdqu 16*7(arg2), %xmm7
|
||||
|
||||
pshufb %xmm11, %xmm0
|
||||
# XOR the initial_crc value
|
||||
pxor %xmm10, %xmm0
|
||||
pshufb %xmm11, %xmm1
|
||||
pshufb %xmm11, %xmm2
|
||||
pshufb %xmm11, %xmm3
|
||||
pshufb %xmm11, %xmm4
|
||||
pshufb %xmm11, %xmm5
|
||||
pshufb %xmm11, %xmm6
|
||||
pshufb %xmm11, %xmm7
|
||||
|
||||
movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
|
||||
#imm value of pclmulqdq instruction
|
||||
#will determine which constant to use
|
||||
|
||||
#################################################################
|
||||
# we subtract 256 instead of 128 to save one instruction from the loop
|
||||
sub $256, arg3
|
||||
|
||||
# at this section of the code, there is 64*x+y (0<=y<64) bytes of
|
||||
# buffer. The _fold_64_B_loop will fold 64B at a time
|
||||
# until we have 64+y Bytes of buffer
|
||||
|
||||
|
||||
# fold 64B at a time. This section of the code folds 4 xmm
|
||||
# registers in parallel
|
||||
_fold_64_B_loop:
|
||||
|
||||
# update the buffer pointer
|
||||
add $128, arg2 # buf += 64#
|
||||
|
||||
movdqu 16*0(arg2), %xmm9
|
||||
movdqu 16*1(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm0, %xmm8
|
||||
movdqa %xmm1, %xmm13
|
||||
pclmulqdq $0x0 , %xmm10, %xmm0
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0 , %xmm10, %xmm1
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm0
|
||||
xorps %xmm8 , %xmm0
|
||||
pxor %xmm12, %xmm1
|
||||
xorps %xmm13, %xmm1
|
||||
|
||||
movdqu 16*2(arg2), %xmm9
|
||||
movdqu 16*3(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm2, %xmm8
|
||||
movdqa %xmm3, %xmm13
|
||||
pclmulqdq $0x0, %xmm10, %xmm2
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0, %xmm10, %xmm3
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm2
|
||||
xorps %xmm8 , %xmm2
|
||||
pxor %xmm12, %xmm3
|
||||
xorps %xmm13, %xmm3
|
||||
|
||||
movdqu 16*4(arg2), %xmm9
|
||||
movdqu 16*5(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm4, %xmm8
|
||||
movdqa %xmm5, %xmm13
|
||||
pclmulqdq $0x0, %xmm10, %xmm4
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0, %xmm10, %xmm5
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm4
|
||||
xorps %xmm8 , %xmm4
|
||||
pxor %xmm12, %xmm5
|
||||
xorps %xmm13, %xmm5
|
||||
|
||||
movdqu 16*6(arg2), %xmm9
|
||||
movdqu 16*7(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm6 , %xmm8
|
||||
movdqa %xmm7 , %xmm13
|
||||
pclmulqdq $0x0 , %xmm10, %xmm6
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0 , %xmm10, %xmm7
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm6
|
||||
xorps %xmm8 , %xmm6
|
||||
pxor %xmm12, %xmm7
|
||||
xorps %xmm13, %xmm7
|
||||
|
||||
sub $128, arg3
|
||||
|
||||
# check if there is another 64B in the buffer to be able to fold
|
||||
jge _fold_64_B_loop
|
||||
##################################################################
|
||||
|
||||
|
||||
add $128, arg2
|
||||
# at this point, the buffer pointer is pointing at the last y Bytes
|
||||
# of the buffer the 64B of folded data is in 4 of the xmm
|
||||
# registers: xmm0, xmm1, xmm2, xmm3
|
||||
|
||||
|
||||
# fold the 8 xmm registers to 1 xmm register with different constants
|
||||
|
||||
movdqa rk9(%rip), %xmm10
|
||||
movdqa %xmm0, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm0
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm0, %xmm7
|
||||
|
||||
movdqa rk11(%rip), %xmm10
|
||||
movdqa %xmm1, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm1
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm1, %xmm7
|
||||
|
||||
movdqa rk13(%rip), %xmm10
|
||||
movdqa %xmm2, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm2
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm2, %xmm7
|
||||
|
||||
movdqa rk15(%rip), %xmm10
|
||||
movdqa %xmm3, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm3
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm3, %xmm7
|
||||
|
||||
movdqa rk17(%rip), %xmm10
|
||||
movdqa %xmm4, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm4
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm4, %xmm7
|
||||
|
||||
movdqa rk19(%rip), %xmm10
|
||||
movdqa %xmm5, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm5
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm5, %xmm7
|
||||
|
||||
movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
|
||||
#imm value of pclmulqdq instruction
|
||||
#will determine which constant to use
|
||||
movdqa %xmm6, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm6
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm6, %xmm7
|
||||
|
||||
|
||||
# instead of 64, we add 48 to the loop counter to save 1 instruction
|
||||
# from the loop instead of a cmp instruction, we use the negative
|
||||
# flag with the jl instruction
|
||||
add $128-16, arg3
|
||||
jl _final_reduction_for_128
|
||||
|
||||
# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
|
||||
# and the rest is in memory. We can fold 16 bytes at a time if y>=16
|
||||
# continue folding 16B at a time
|
||||
|
||||
_16B_reduction_loop:
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
movdqu (arg2), %xmm0
|
||||
pshufb %xmm11, %xmm0
|
||||
pxor %xmm0 , %xmm7
|
||||
add $16, arg2
|
||||
sub $16, arg3
|
||||
# instead of a cmp instruction, we utilize the flags with the
|
||||
# jge instruction equivalent of: cmp arg3, 16-16
|
||||
# check if there is any more 16B in the buffer to be able to fold
|
||||
jge _16B_reduction_loop
|
||||
|
||||
#now we have 16+z bytes left to reduce, where 0<= z < 16.
|
||||
#first, we reduce the data in the xmm7 register
|
||||
|
||||
|
||||
_final_reduction_for_128:
|
||||
# check if any more data to fold. If not, compute the CRC of
|
||||
# the final 128 bits
|
||||
add $16, arg3
|
||||
je _128_done
|
||||
|
||||
# here we are getting data that is less than 16 bytes.
|
||||
# since we know that there was data before the pointer, we can
|
||||
# offset the input pointer before the actual point, to receive
|
||||
# exactly 16 bytes. after that the registers need to be adjusted.
|
||||
_get_last_two_xmms:
|
||||
movdqa %xmm7, %xmm2
|
||||
|
||||
movdqu -16(arg2, arg3), %xmm1
|
||||
pshufb %xmm11, %xmm1
|
||||
|
||||
# get rid of the extra data that was loaded before
|
||||
# load the shift constant
|
||||
lea pshufb_shf_table+16(%rip), %rax
|
||||
sub arg3, %rax
|
||||
movdqu (%rax), %xmm0
|
||||
|
||||
# shift xmm2 to the left by arg3 bytes
|
||||
pshufb %xmm0, %xmm2
|
||||
|
||||
# shift xmm7 to the right by 16-arg3 bytes
|
||||
pxor mask1(%rip), %xmm0
|
||||
pshufb %xmm0, %xmm7
|
||||
pblendvb %xmm2, %xmm1 #xmm0 is implicit
|
||||
|
||||
# fold 16 Bytes
|
||||
movdqa %xmm1, %xmm2
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm2, %xmm7
|
||||
|
||||
_128_done:
|
||||
# compute crc of a 128-bit value
|
||||
movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
|
||||
movdqa %xmm7, %xmm0
|
||||
|
||||
#64b fold
|
||||
pclmulqdq $0x1, %xmm10, %xmm7
|
||||
pslldq $8 , %xmm0
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
#32b fold
|
||||
movdqa %xmm7, %xmm0
|
||||
|
||||
pand mask2(%rip), %xmm0
|
||||
|
||||
psrldq $12, %xmm7
|
||||
pclmulqdq $0x10, %xmm10, %xmm7
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
#barrett reduction
|
||||
_barrett:
|
||||
movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
|
||||
movdqa %xmm7, %xmm0
|
||||
pclmulqdq $0x01, %xmm10, %xmm7
|
||||
pslldq $4, %xmm7
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
|
||||
pslldq $4, %xmm7
|
||||
pxor %xmm0, %xmm7
|
||||
pextrd $1, %xmm7, %eax
|
||||
|
||||
_cleanup:
|
||||
# scale the result back to 16 bits
|
||||
shr $16, %eax
|
||||
mov %rcx, %rsp
|
||||
ret
|
||||
|
||||
########################################################################
|
||||
|
||||
.align 16
|
||||
_less_than_128:
|
||||
|
||||
# check if there is enough buffer to be able to fold 16B at a time
|
||||
cmp $32, arg3
|
||||
jl _less_than_32
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
|
||||
# now if there is, load the constants
|
||||
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
|
||||
|
||||
movd arg1_low32, %xmm0 # get the initial crc value
|
||||
pslldq $12, %xmm0 # align it to its correct place
|
||||
movdqu (arg2), %xmm7 # load the plaintext
|
||||
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
|
||||
# update the buffer pointer
|
||||
add $16, arg2
|
||||
|
||||
# update the counter. subtract 32 instead of 16 to save one
|
||||
# instruction from the loop
|
||||
sub $32, arg3
|
||||
|
||||
jmp _16B_reduction_loop
|
||||
|
||||
|
||||
.align 16
|
||||
_less_than_32:
|
||||
# mov initial crc to the return value. this is necessary for
|
||||
# zero-length buffers.
|
||||
mov arg1_low32, %eax
|
||||
test arg3, arg3
|
||||
je _cleanup
|
||||
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
|
||||
movd arg1_low32, %xmm0 # get the initial crc value
|
||||
pslldq $12, %xmm0 # align it to its correct place
|
||||
|
||||
cmp $16, arg3
|
||||
je _exact_16_left
|
||||
jl _less_than_16_left
|
||||
|
||||
movdqu (arg2), %xmm7 # load the plaintext
|
||||
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
add $16, arg2
|
||||
sub $16, arg3
|
||||
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
|
||||
jmp _get_last_two_xmms
|
||||
|
||||
|
||||
.align 16
|
||||
_less_than_16_left:
|
||||
# use stack space to load data less than 16 bytes, zero-out
|
||||
# the 16B in memory first.
|
||||
|
||||
pxor %xmm1, %xmm1
|
||||
mov %rsp, %r11
|
||||
movdqa %xmm1, (%r11)
|
||||
|
||||
cmp $4, arg3
|
||||
jl _only_less_than_4
|
||||
|
||||
# backup the counter value
|
||||
mov arg3, %r9
|
||||
cmp $8, arg3
|
||||
jl _less_than_8_left
|
||||
|
||||
# load 8 Bytes
|
||||
mov (arg2), %rax
|
||||
mov %rax, (%r11)
|
||||
add $8, %r11
|
||||
sub $8, arg3
|
||||
add $8, arg2
|
||||
_less_than_8_left:
|
||||
|
||||
cmp $4, arg3
|
||||
jl _less_than_4_left
|
||||
|
||||
# load 4 Bytes
|
||||
mov (arg2), %eax
|
||||
mov %eax, (%r11)
|
||||
add $4, %r11
|
||||
sub $4, arg3
|
||||
add $4, arg2
|
||||
_less_than_4_left:
|
||||
|
||||
cmp $2, arg3
|
||||
jl _less_than_2_left
|
||||
|
||||
# load 2 Bytes
|
||||
mov (arg2), %ax
|
||||
mov %ax, (%r11)
|
||||
add $2, %r11
|
||||
sub $2, arg3
|
||||
add $2, arg2
|
||||
_less_than_2_left:
|
||||
cmp $1, arg3
|
||||
jl _zero_left
|
||||
|
||||
# load 1 Byte
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
_zero_left:
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
# shl r9, 4
|
||||
lea pshufb_shf_table+16(%rip), %rax
|
||||
sub %r9, %rax
|
||||
movdqu (%rax), %xmm0
|
||||
pxor mask1(%rip), %xmm0
|
||||
|
||||
pshufb %xmm0, %xmm7
|
||||
jmp _128_done
|
||||
|
||||
.align 16
|
||||
_exact_16_left:
|
||||
movdqu (arg2), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
jmp _128_done
|
||||
|
||||
_only_less_than_4:
|
||||
cmp $3, arg3
|
||||
jl _only_less_than_3
|
||||
|
||||
# load 3 Bytes
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
mov 1(arg2), %al
|
||||
mov %al, 1(%r11)
|
||||
|
||||
mov 2(arg2), %al
|
||||
mov %al, 2(%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $5, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_3:
|
||||
cmp $2, arg3
|
||||
jl _only_less_than_2
|
||||
|
||||
# load 2 Bytes
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
mov 1(arg2), %al
|
||||
mov %al, 1(%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $6, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_2:
|
||||
|
||||
# load 1 Byte
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $7, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
|
||||
ENDPROC(crc_t10dif_pcl)
|
||||
|
||||
.data
|
||||
|
||||
# precomputed constants
|
||||
# these constants are precomputed from the poly:
|
||||
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
|
||||
.align 16
|
||||
# Q = 0x18BB70000
|
||||
# rk1 = 2^(32*3) mod Q << 32
|
||||
# rk2 = 2^(32*5) mod Q << 32
|
||||
# rk3 = 2^(32*15) mod Q << 32
|
||||
# rk4 = 2^(32*17) mod Q << 32
|
||||
# rk5 = 2^(32*3) mod Q << 32
|
||||
# rk6 = 2^(32*2) mod Q << 32
|
||||
# rk7 = floor(2^64/Q)
|
||||
# rk8 = Q
|
||||
rk1:
|
||||
.quad 0x2d56000000000000
|
||||
rk2:
|
||||
.quad 0x06df000000000000
|
||||
rk3:
|
||||
.quad 0x9d9d000000000000
|
||||
rk4:
|
||||
.quad 0x7cf5000000000000
|
||||
rk5:
|
||||
.quad 0x2d56000000000000
|
||||
rk6:
|
||||
.quad 0x1368000000000000
|
||||
rk7:
|
||||
.quad 0x00000001f65a57f8
|
||||
rk8:
|
||||
.quad 0x000000018bb70000
|
||||
|
||||
rk9:
|
||||
.quad 0xceae000000000000
|
||||
rk10:
|
||||
.quad 0xbfd6000000000000
|
||||
rk11:
|
||||
.quad 0x1e16000000000000
|
||||
rk12:
|
||||
.quad 0x713c000000000000
|
||||
rk13:
|
||||
.quad 0xf7f9000000000000
|
||||
rk14:
|
||||
.quad 0x80a6000000000000
|
||||
rk15:
|
||||
.quad 0x044c000000000000
|
||||
rk16:
|
||||
.quad 0xe658000000000000
|
||||
rk17:
|
||||
.quad 0xad18000000000000
|
||||
rk18:
|
||||
.quad 0xa497000000000000
|
||||
rk19:
|
||||
.quad 0x6ee3000000000000
|
||||
rk20:
|
||||
.quad 0xe7b5000000000000
|
||||
|
||||
|
||||
|
||||
mask1:
|
||||
.octa 0x80808080808080808080808080808080
|
||||
mask2:
|
||||
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
SHUF_MASK:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
pshufb_shf_table:
|
||||
# use these values for shift constants for the pshufb instruction
|
||||
# different alignments result in values as shown:
|
||||
# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
|
||||
# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
|
||||
# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
|
||||
# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
|
||||
# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
|
||||
# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
|
||||
# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
|
||||
# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
|
||||
# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
|
||||
# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
|
||||
# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
|
||||
# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
|
||||
# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
|
||||
# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
|
||||
# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
|
||||
.octa 0x8f8e8d8c8b8a89888786858483828100
|
||||
.octa 0x000e0d0c0b0a09080706050403020100
|
151
arch/x86/crypto/crct10dif-pclmul_glue.c
Normal file
151
arch/x86/crypto/crct10dif-pclmul_glue.c
Normal file
@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
|
||||
*
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
|
||||
size_t len);
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
__u16 crc;
|
||||
};
|
||||
|
||||
/*
|
||||
* Steps through buffer one byte at at time, calculates reflected
|
||||
* crc using table.
|
||||
*/
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
if (irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*(__u16 *)out = ctx->crc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
if (irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, length, out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crct10dif",
|
||||
.cra_driver_name = "crct10dif-pclmul",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id crct10dif_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
|
||||
|
||||
static int __init crct10dif_intel_mod_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(crct10dif_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crct10dif_intel_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crct10dif_intel_mod_init);
|
||||
module_exit(crct10dif_intel_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
MODULE_ALIAS("crct10dif");
|
||||
MODULE_ALIAS("crct10dif-pclmul");
|
@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
static int sha224_ssse3_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA224_H0;
|
||||
sctx->state[1] = SHA224_H1;
|
||||
sctx->state[2] = SHA224_H2;
|
||||
sctx->state[3] = SHA224_H3;
|
||||
sctx->state[4] = SHA224_H4;
|
||||
sctx->state[5] = SHA224_H5;
|
||||
sctx->state[6] = SHA224_H6;
|
||||
sctx->state[7] = SHA224_H7;
|
||||
sctx->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
|
||||
{
|
||||
u8 D[SHA256_DIGEST_SIZE];
|
||||
|
||||
sha256_ssse3_final(desc, D);
|
||||
|
||||
memcpy(hash, D, SHA224_DIGEST_SIZE);
|
||||
memset(D, 0, SHA256_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_ssse3_init,
|
||||
.update = sha256_ssse3_update,
|
||||
@ -204,7 +233,24 @@ static struct shash_alg alg = {
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
}, {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_ssse3_init,
|
||||
.update = sha256_ssse3_update,
|
||||
.final = sha224_ssse3_final,
|
||||
.export = sha256_ssse3_export,
|
||||
.import = sha256_ssse3_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
|
||||
#ifdef CONFIG_AS_AVX
|
||||
static bool __init avx_usable(void)
|
||||
@ -227,7 +273,7 @@ static bool __init avx_usable(void)
|
||||
|
||||
static int __init sha256_ssse3_mod_init(void)
|
||||
{
|
||||
/* test for SSE3 first */
|
||||
/* test for SSSE3 first */
|
||||
if (cpu_has_ssse3)
|
||||
sha256_transform_asm = sha256_transform_ssse3;
|
||||
|
||||
@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
|
||||
else
|
||||
#endif
|
||||
pr_info("Using SSSE3 optimized SHA-256 implementation\n");
|
||||
return crypto_register_shash(&alg);
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
|
||||
|
||||
@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
|
||||
|
||||
static void __exit sha256_ssse3_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(sha256_ssse3_mod_init);
|
||||
@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
|
||||
|
||||
MODULE_ALIAS("sha256");
|
||||
MODULE_ALIAS("sha384");
|
||||
|
@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
static int sha384_ssse3_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA384_H0;
|
||||
sctx->state[1] = SHA384_H1;
|
||||
sctx->state[2] = SHA384_H2;
|
||||
sctx->state[3] = SHA384_H3;
|
||||
sctx->state[4] = SHA384_H4;
|
||||
sctx->state[5] = SHA384_H5;
|
||||
sctx->state[6] = SHA384_H6;
|
||||
sctx->state[7] = SHA384_H7;
|
||||
|
||||
sctx->count[0] = sctx->count[1] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
|
||||
{
|
||||
u8 D[SHA512_DIGEST_SIZE];
|
||||
|
||||
sha512_ssse3_final(desc, D);
|
||||
|
||||
memcpy(hash, D, SHA384_DIGEST_SIZE);
|
||||
memset(D, 0, SHA512_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA512_DIGEST_SIZE,
|
||||
.init = sha512_ssse3_init,
|
||||
.update = sha512_ssse3_update,
|
||||
@ -211,7 +241,24 @@ static struct shash_alg alg = {
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
}, {
|
||||
.digestsize = SHA384_DIGEST_SIZE,
|
||||
.init = sha384_ssse3_init,
|
||||
.update = sha512_ssse3_update,
|
||||
.final = sha384_ssse3_final,
|
||||
.export = sha512_ssse3_export,
|
||||
.import = sha512_ssse3_import,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.statesize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
|
||||
#ifdef CONFIG_AS_AVX
|
||||
static bool __init avx_usable(void)
|
||||
@ -234,7 +281,7 @@ static bool __init avx_usable(void)
|
||||
|
||||
static int __init sha512_ssse3_mod_init(void)
|
||||
{
|
||||
/* test for SSE3 first */
|
||||
/* test for SSSE3 first */
|
||||
if (cpu_has_ssse3)
|
||||
sha512_transform_asm = sha512_transform_ssse3;
|
||||
|
||||
@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
|
||||
else
|
||||
#endif
|
||||
pr_info("Using SSSE3 optimized SHA-512 implementation\n");
|
||||
return crypto_register_shash(&alg);
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
|
||||
|
||||
@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
|
||||
|
||||
static void __exit sha512_ssse3_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(sha512_ssse3_mod_init);
|
||||
@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
|
||||
|
||||
MODULE_ALIAS("sha512");
|
||||
MODULE_ALIAS("sha384");
|
||||
|
@ -1,600 +0,0 @@
|
||||
/*
|
||||
* x86_64/AVX2 assembler optimized version of Twofish
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "glue_helper-asm-avx2.S"
|
||||
|
||||
.file "twofish-avx2-asm_64.S"
|
||||
|
||||
.data
|
||||
.align 16
|
||||
|
||||
.Lvpshufb_mask0:
|
||||
.long 0x80808000
|
||||
.long 0x80808004
|
||||
.long 0x80808008
|
||||
.long 0x8080800c
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lxts_gf128mul_and_shl1_mask_0:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
.Lxts_gf128mul_and_shl1_mask_1:
|
||||
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
/* structure of crypto context */
|
||||
#define s0 0
|
||||
#define s1 1024
|
||||
#define s2 2048
|
||||
#define s3 3072
|
||||
#define w 4096
|
||||
#define k 4128
|
||||
|
||||
/* register macros */
|
||||
#define CTX %rdi
|
||||
|
||||
#define RS0 CTX
|
||||
#define RS1 %r8
|
||||
#define RS2 %r9
|
||||
#define RS3 %r10
|
||||
#define RK %r11
|
||||
#define RW %rax
|
||||
#define RROUND %r12
|
||||
#define RROUNDd %r12d
|
||||
|
||||
#define RA0 %ymm8
|
||||
#define RB0 %ymm9
|
||||
#define RC0 %ymm10
|
||||
#define RD0 %ymm11
|
||||
#define RA1 %ymm12
|
||||
#define RB1 %ymm13
|
||||
#define RC1 %ymm14
|
||||
#define RD1 %ymm15
|
||||
|
||||
/* temp regs */
|
||||
#define RX0 %ymm0
|
||||
#define RY0 %ymm1
|
||||
#define RX1 %ymm2
|
||||
#define RY1 %ymm3
|
||||
#define RT0 %ymm4
|
||||
#define RIDX %ymm5
|
||||
|
||||
#define RX0x %xmm0
|
||||
#define RY0x %xmm1
|
||||
#define RX1x %xmm2
|
||||
#define RY1x %xmm3
|
||||
#define RT0x %xmm4
|
||||
|
||||
/* vpgatherdd mask and '-1' */
|
||||
#define RNOT %ymm6
|
||||
|
||||
/* byte mask, (-1 >> 24) */
|
||||
#define RBYTE %ymm7
|
||||
|
||||
/**********************************************************************
|
||||
16-way AVX2 twofish
|
||||
**********************************************************************/
|
||||
#define init_round_constants() \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpsrld $24, RNOT, RBYTE; \
|
||||
leaq k(CTX), RK; \
|
||||
leaq w(CTX), RW; \
|
||||
leaq s1(CTX), RS1; \
|
||||
leaq s2(CTX), RS2; \
|
||||
leaq s3(CTX), RS3; \
|
||||
|
||||
#define g16(ab, rs0, rs1, rs2, rs3, xy) \
|
||||
vpand RBYTE, ab ## 0, RIDX; \
|
||||
vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
\
|
||||
vpand RBYTE, ab ## 1, RIDX; \
|
||||
vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
\
|
||||
vpsrld $8, ab ## 0, RIDX; \
|
||||
vpand RBYTE, RIDX, RIDX; \
|
||||
vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 0, xy ## 0; \
|
||||
\
|
||||
vpsrld $8, ab ## 1, RIDX; \
|
||||
vpand RBYTE, RIDX, RIDX; \
|
||||
vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 1, xy ## 1; \
|
||||
\
|
||||
vpsrld $16, ab ## 0, RIDX; \
|
||||
vpand RBYTE, RIDX, RIDX; \
|
||||
vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 0, xy ## 0; \
|
||||
\
|
||||
vpsrld $16, ab ## 1, RIDX; \
|
||||
vpand RBYTE, RIDX, RIDX; \
|
||||
vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 1, xy ## 1; \
|
||||
\
|
||||
vpsrld $24, ab ## 0, RIDX; \
|
||||
vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 0, xy ## 0; \
|
||||
\
|
||||
vpsrld $24, ab ## 1, RIDX; \
|
||||
vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
|
||||
vpcmpeqd RNOT, RNOT, RNOT; \
|
||||
vpxor RT0, xy ## 1, xy ## 1;
|
||||
|
||||
#define g1_16(a, x) \
|
||||
g16(a, RS0, RS1, RS2, RS3, x);
|
||||
|
||||
#define g2_16(b, y) \
|
||||
g16(b, RS1, RS2, RS3, RS0, y);
|
||||
|
||||
#define encrypt_round_end16(a, b, c, d, nk) \
|
||||
vpaddd RY0, RX0, RX0; \
|
||||
vpaddd RX0, RY0, RY0; \
|
||||
vpbroadcastd nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RX0, RX0; \
|
||||
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RY0, RY0; \
|
||||
\
|
||||
vpxor RY0, d ## 0, d ## 0; \
|
||||
\
|
||||
vpxor RX0, c ## 0, c ## 0; \
|
||||
vpsrld $1, c ## 0, RT0; \
|
||||
vpslld $31, c ## 0, c ## 0; \
|
||||
vpor RT0, c ## 0, c ## 0; \
|
||||
\
|
||||
vpaddd RY1, RX1, RX1; \
|
||||
vpaddd RX1, RY1, RY1; \
|
||||
vpbroadcastd nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RX1, RX1; \
|
||||
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RY1, RY1; \
|
||||
\
|
||||
vpxor RY1, d ## 1, d ## 1; \
|
||||
\
|
||||
vpxor RX1, c ## 1, c ## 1; \
|
||||
vpsrld $1, c ## 1, RT0; \
|
||||
vpslld $31, c ## 1, c ## 1; \
|
||||
vpor RT0, c ## 1, c ## 1; \
|
||||
|
||||
#define encrypt_round16(a, b, c, d, nk) \
|
||||
g2_16(b, RY); \
|
||||
\
|
||||
vpslld $1, b ## 0, RT0; \
|
||||
vpsrld $31, b ## 0, b ## 0; \
|
||||
vpor RT0, b ## 0, b ## 0; \
|
||||
\
|
||||
vpslld $1, b ## 1, RT0; \
|
||||
vpsrld $31, b ## 1, b ## 1; \
|
||||
vpor RT0, b ## 1, b ## 1; \
|
||||
\
|
||||
g1_16(a, RX); \
|
||||
\
|
||||
encrypt_round_end16(a, b, c, d, nk);
|
||||
|
||||
#define encrypt_round_first16(a, b, c, d, nk) \
|
||||
vpslld $1, d ## 0, RT0; \
|
||||
vpsrld $31, d ## 0, d ## 0; \
|
||||
vpor RT0, d ## 0, d ## 0; \
|
||||
\
|
||||
vpslld $1, d ## 1, RT0; \
|
||||
vpsrld $31, d ## 1, d ## 1; \
|
||||
vpor RT0, d ## 1, d ## 1; \
|
||||
\
|
||||
encrypt_round16(a, b, c, d, nk);
|
||||
|
||||
#define encrypt_round_last16(a, b, c, d, nk) \
|
||||
g2_16(b, RY); \
|
||||
\
|
||||
g1_16(a, RX); \
|
||||
\
|
||||
encrypt_round_end16(a, b, c, d, nk);
|
||||
|
||||
#define decrypt_round_end16(a, b, c, d, nk) \
|
||||
vpaddd RY0, RX0, RX0; \
|
||||
vpaddd RX0, RY0, RY0; \
|
||||
vpbroadcastd nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RX0, RX0; \
|
||||
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RY0, RY0; \
|
||||
\
|
||||
vpxor RX0, c ## 0, c ## 0; \
|
||||
\
|
||||
vpxor RY0, d ## 0, d ## 0; \
|
||||
vpsrld $1, d ## 0, RT0; \
|
||||
vpslld $31, d ## 0, d ## 0; \
|
||||
vpor RT0, d ## 0, d ## 0; \
|
||||
\
|
||||
vpaddd RY1, RX1, RX1; \
|
||||
vpaddd RX1, RY1, RY1; \
|
||||
vpbroadcastd nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RX1, RX1; \
|
||||
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
|
||||
vpaddd RT0, RY1, RY1; \
|
||||
\
|
||||
vpxor RX1, c ## 1, c ## 1; \
|
||||
\
|
||||
vpxor RY1, d ## 1, d ## 1; \
|
||||
vpsrld $1, d ## 1, RT0; \
|
||||
vpslld $31, d ## 1, d ## 1; \
|
||||
vpor RT0, d ## 1, d ## 1;
|
||||
|
||||
#define decrypt_round16(a, b, c, d, nk) \
|
||||
g1_16(a, RX); \
|
||||
\
|
||||
vpslld $1, a ## 0, RT0; \
|
||||
vpsrld $31, a ## 0, a ## 0; \
|
||||
vpor RT0, a ## 0, a ## 0; \
|
||||
\
|
||||
vpslld $1, a ## 1, RT0; \
|
||||
vpsrld $31, a ## 1, a ## 1; \
|
||||
vpor RT0, a ## 1, a ## 1; \
|
||||
\
|
||||
g2_16(b, RY); \
|
||||
\
|
||||
decrypt_round_end16(a, b, c, d, nk);
|
||||
|
||||
#define decrypt_round_first16(a, b, c, d, nk) \
|
||||
vpslld $1, c ## 0, RT0; \
|
||||
vpsrld $31, c ## 0, c ## 0; \
|
||||
vpor RT0, c ## 0, c ## 0; \
|
||||
\
|
||||
vpslld $1, c ## 1, RT0; \
|
||||
vpsrld $31, c ## 1, c ## 1; \
|
||||
vpor RT0, c ## 1, c ## 1; \
|
||||
\
|
||||
decrypt_round16(a, b, c, d, nk)
|
||||
|
||||
#define decrypt_round_last16(a, b, c, d, nk) \
|
||||
g1_16(a, RX); \
|
||||
\
|
||||
g2_16(b, RY); \
|
||||
\
|
||||
decrypt_round_end16(a, b, c, d, nk);
|
||||
|
||||
#define encrypt_cycle16() \
|
||||
encrypt_round16(RA, RB, RC, RD, 0); \
|
||||
encrypt_round16(RC, RD, RA, RB, 8);
|
||||
|
||||
#define encrypt_cycle_first16() \
|
||||
encrypt_round_first16(RA, RB, RC, RD, 0); \
|
||||
encrypt_round16(RC, RD, RA, RB, 8);
|
||||
|
||||
#define encrypt_cycle_last16() \
|
||||
encrypt_round16(RA, RB, RC, RD, 0); \
|
||||
encrypt_round_last16(RC, RD, RA, RB, 8);
|
||||
|
||||
#define decrypt_cycle16(n) \
|
||||
decrypt_round16(RC, RD, RA, RB, 8); \
|
||||
decrypt_round16(RA, RB, RC, RD, 0);
|
||||
|
||||
#define decrypt_cycle_first16(n) \
|
||||
decrypt_round_first16(RC, RD, RA, RB, 8); \
|
||||
decrypt_round16(RA, RB, RC, RD, 0);
|
||||
|
||||
#define decrypt_cycle_last16(n) \
|
||||
decrypt_round16(RC, RD, RA, RB, 8); \
|
||||
decrypt_round_last16(RA, RB, RC, RD, 0);
|
||||
|
||||
#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
|
||||
vpunpckhdq x1, x0, t2; \
|
||||
vpunpckldq x1, x0, x0; \
|
||||
\
|
||||
vpunpckldq x3, x2, t1; \
|
||||
vpunpckhdq x3, x2, x2; \
|
||||
\
|
||||
vpunpckhqdq t1, x0, x1; \
|
||||
vpunpcklqdq t1, x0, x0; \
|
||||
\
|
||||
vpunpckhqdq x2, t2, x3; \
|
||||
vpunpcklqdq x2, t2, x2;
|
||||
|
||||
#define read_blocks8(offs,a,b,c,d) \
|
||||
transpose_4x4(a, b, c, d, RX0, RY0);
|
||||
|
||||
#define write_blocks8(offs,a,b,c,d) \
|
||||
transpose_4x4(a, b, c, d, RX0, RY0);
|
||||
|
||||
#define inpack_enc8(a,b,c,d) \
|
||||
vpbroadcastd 4*0(RW), RT0; \
|
||||
vpxor RT0, a, a; \
|
||||
\
|
||||
vpbroadcastd 4*1(RW), RT0; \
|
||||
vpxor RT0, b, b; \
|
||||
\
|
||||
vpbroadcastd 4*2(RW), RT0; \
|
||||
vpxor RT0, c, c; \
|
||||
\
|
||||
vpbroadcastd 4*3(RW), RT0; \
|
||||
vpxor RT0, d, d;
|
||||
|
||||
#define outunpack_enc8(a,b,c,d) \
|
||||
vpbroadcastd 4*4(RW), RX0; \
|
||||
vpbroadcastd 4*5(RW), RY0; \
|
||||
vpxor RX0, c, RX0; \
|
||||
vpxor RY0, d, RY0; \
|
||||
\
|
||||
vpbroadcastd 4*6(RW), RT0; \
|
||||
vpxor RT0, a, c; \
|
||||
vpbroadcastd 4*7(RW), RT0; \
|
||||
vpxor RT0, b, d; \
|
||||
\
|
||||
vmovdqa RX0, a; \
|
||||
vmovdqa RY0, b;
|
||||
|
||||
#define inpack_dec8(a,b,c,d) \
|
||||
vpbroadcastd 4*4(RW), RX0; \
|
||||
vpbroadcastd 4*5(RW), RY0; \
|
||||
vpxor RX0, a, RX0; \
|
||||
vpxor RY0, b, RY0; \
|
||||
\
|
||||
vpbroadcastd 4*6(RW), RT0; \
|
||||
vpxor RT0, c, a; \
|
||||
vpbroadcastd 4*7(RW), RT0; \
|
||||
vpxor RT0, d, b; \
|
||||
\
|
||||
vmovdqa RX0, c; \
|
||||
vmovdqa RY0, d;
|
||||
|
||||
#define outunpack_dec8(a,b,c,d) \
|
||||
vpbroadcastd 4*0(RW), RT0; \
|
||||
vpxor RT0, a, a; \
|
||||
\
|
||||
vpbroadcastd 4*1(RW), RT0; \
|
||||
vpxor RT0, b, b; \
|
||||
\
|
||||
vpbroadcastd 4*2(RW), RT0; \
|
||||
vpxor RT0, c, c; \
|
||||
\
|
||||
vpbroadcastd 4*3(RW), RT0; \
|
||||
vpxor RT0, d, d;
|
||||
|
||||
#define read_blocks16(a,b,c,d) \
|
||||
read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define write_blocks16(a,b,c,d) \
|
||||
write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define xor_blocks16(a,b,c,d) \
|
||||
xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define inpack_enc16(a,b,c,d) \
|
||||
inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define outunpack_enc16(a,b,c,d) \
|
||||
outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define inpack_dec16(a,b,c,d) \
|
||||
inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
#define outunpack_dec16(a,b,c,d) \
|
||||
outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
|
||||
outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
|
||||
|
||||
.align 8
|
||||
__twofish_enc_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
|
||||
* output:
|
||||
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
|
||||
*/
|
||||
init_round_constants();
|
||||
|
||||
read_blocks16(RA, RB, RC, RD);
|
||||
inpack_enc16(RA, RB, RC, RD);
|
||||
|
||||
xorl RROUNDd, RROUNDd;
|
||||
encrypt_cycle_first16();
|
||||
movl $2, RROUNDd;
|
||||
|
||||
.align 4
|
||||
.L__enc_loop:
|
||||
encrypt_cycle16();
|
||||
|
||||
addl $2, RROUNDd;
|
||||
cmpl $14, RROUNDd;
|
||||
jne .L__enc_loop;
|
||||
|
||||
encrypt_cycle_last16();
|
||||
|
||||
outunpack_enc16(RA, RB, RC, RD);
|
||||
write_blocks16(RA, RB, RC, RD);
|
||||
|
||||
ret;
|
||||
ENDPROC(__twofish_enc_blk16)
|
||||
|
||||
.align 8
|
||||
__twofish_dec_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
|
||||
* output:
|
||||
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
|
||||
*/
|
||||
init_round_constants();
|
||||
|
||||
read_blocks16(RA, RB, RC, RD);
|
||||
inpack_dec16(RA, RB, RC, RD);
|
||||
|
||||
movl $14, RROUNDd;
|
||||
decrypt_cycle_first16();
|
||||
movl $12, RROUNDd;
|
||||
|
||||
.align 4
|
||||
.L__dec_loop:
|
||||
decrypt_cycle16();
|
||||
|
||||
addl $-2, RROUNDd;
|
||||
jnz .L__dec_loop;
|
||||
|
||||
decrypt_cycle_last16();
|
||||
|
||||
outunpack_dec16(RA, RB, RC, RD);
|
||||
write_blocks16(RA, RB, RC, RD);
|
||||
|
||||
ret;
|
||||
ENDPROC(__twofish_dec_blk16)
|
||||
|
||||
ENTRY(twofish_ecb_enc_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
pushq %r12;
|
||||
|
||||
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
call __twofish_enc_blk16;
|
||||
|
||||
store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
popq %r12;
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(twofish_ecb_enc_16way)
|
||||
|
||||
ENTRY(twofish_ecb_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
pushq %r12;
|
||||
|
||||
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
call __twofish_dec_blk16;
|
||||
|
||||
store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
popq %r12;
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(twofish_ecb_dec_16way)
|
||||
|
||||
ENTRY(twofish_cbc_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
pushq %r12;
|
||||
|
||||
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
call __twofish_dec_blk16;
|
||||
|
||||
store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
|
||||
RX0);
|
||||
|
||||
popq %r12;
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(twofish_cbc_dec_16way)
|
||||
|
||||
ENTRY(twofish_ctr_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (little endian, 128bit)
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
pushq %r12;
|
||||
|
||||
load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
|
||||
RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
|
||||
RBYTE);
|
||||
|
||||
call __twofish_enc_blk16;
|
||||
|
||||
store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
popq %r12;
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(twofish_ctr_16way)
|
||||
|
||||
.align 8
|
||||
twofish_xts_crypt_16way:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
||||
* %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
|
||||
*/
|
||||
|
||||
vzeroupper;
|
||||
pushq %r12;
|
||||
|
||||
load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
|
||||
RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
|
||||
.Lxts_gf128mul_and_shl1_mask_0,
|
||||
.Lxts_gf128mul_and_shl1_mask_1);
|
||||
|
||||
call *%r8;
|
||||
|
||||
store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
|
||||
|
||||
popq %r12;
|
||||
vzeroupper;
|
||||
|
||||
ret;
|
||||
ENDPROC(twofish_xts_crypt_16way)
|
||||
|
||||
ENTRY(twofish_xts_enc_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
||||
*/
|
||||
leaq __twofish_enc_blk16, %r8;
|
||||
jmp twofish_xts_crypt_16way;
|
||||
ENDPROC(twofish_xts_enc_16way)
|
||||
|
||||
ENTRY(twofish_xts_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
||||
*/
|
||||
leaq __twofish_dec_blk16, %r8;
|
||||
jmp twofish_xts_crypt_16way;
|
||||
ENDPROC(twofish_xts_dec_16way)
|
@ -1,584 +0,0 @@
|
||||
/*
|
||||
* Glue Code for x86_64/AVX2 assembler optimized version of Twofish
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/twofish.h>
|
||||
#include <crypto/lrw.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/twofish.h>
|
||||
#include <asm/crypto/ablk_helper.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
|
||||
#define TF_AVX2_PARALLEL_BLOCKS 16
|
||||
|
||||
/* 16-way AVX2 parallel cipher functions */
|
||||
asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
|
||||
|
||||
asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
|
||||
le128 *iv);
|
||||
|
||||
asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
|
||||
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__twofish_enc_blk_3way(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static const struct common_glue_ctx twofish_enc = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
|
||||
}, {
|
||||
.num_blocks = 3,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx twofish_ctr = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
|
||||
}, {
|
||||
.num_blocks = 3,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx twofish_enc_xts = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx twofish_dec = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 3,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx twofish_dec_cbc = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 3,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx twofish_dec_xts = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = 8,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = 16,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 8,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
|
||||
} }
|
||||
};
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
|
||||
dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
/* since reusing AVX functions, starts using FPU at 8 parallel blocks */
|
||||
return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
|
||||
}
|
||||
|
||||
static inline void twofish_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
glue_fpu_end(fpu_enabled);
|
||||
}
|
||||
|
||||
struct crypt_priv {
|
||||
struct twofish_ctx *ctx;
|
||||
bool fpu_enabled;
|
||||
};
|
||||
|
||||
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = TF_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
|
||||
twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= 8 * bsize) {
|
||||
twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * 8;
|
||||
nbytes -= bsize * 8;
|
||||
}
|
||||
|
||||
while (nbytes >= 3 * bsize) {
|
||||
twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * 3;
|
||||
nbytes -= bsize * 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
twofish_enc_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = TF_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
|
||||
twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= 8 * bsize) {
|
||||
twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * 8;
|
||||
nbytes -= bsize * 8;
|
||||
}
|
||||
|
||||
while (nbytes >= 3 * bsize) {
|
||||
twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * 3;
|
||||
nbytes -= bsize * 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
twofish_dec_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[TF_AVX2_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->twofish_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = encrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
twofish_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[TF_AVX2_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->twofish_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = decrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
twofish_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(twofish_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(twofish_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static struct crypto_alg tf_algs[10] = { {
|
||||
.cra_name = "__ecb-twofish-avx2",
|
||||
.cra_driver_name = "__driver-ecb-twofish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct twofish_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.setkey = twofish_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-twofish-avx2",
|
||||
.cra_driver_name = "__driver-cbc-twofish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct twofish_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.setkey = twofish_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-twofish-avx2",
|
||||
.cra_driver_name = "__driver-ctr-twofish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct twofish_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = twofish_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__lrw-twofish-avx2",
|
||||
.cra_driver_name = "__driver-lrw-twofish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = lrw_twofish_exit_tfm,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE +
|
||||
TF_BLOCK_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE +
|
||||
TF_BLOCK_SIZE,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = lrw_twofish_setkey,
|
||||
.encrypt = lrw_encrypt,
|
||||
.decrypt = lrw_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-twofish-avx2",
|
||||
.cra_driver_name = "__driver-xts-twofish-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = TF_MAX_KEY_SIZE * 2,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = xts_twofish_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(twofish)",
|
||||
.cra_driver_name = "ecb-twofish-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(twofish)",
|
||||
.cra_driver_name = "cbc-twofish-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(twofish)",
|
||||
.cra_driver_name = "ctr-twofish-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "lrw(twofish)",
|
||||
.cra_driver_name = "lrw-twofish-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE +
|
||||
TF_BLOCK_SIZE,
|
||||
.max_keysize = TF_MAX_KEY_SIZE +
|
||||
TF_BLOCK_SIZE,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "xts(twofish)",
|
||||
.cra_driver_name = "xts-twofish-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = TF_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = TF_MAX_KEY_SIZE * 2,
|
||||
.ivsize = TF_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx2 || !cpu_has_osxsave) {
|
||||
pr_info("AVX2 instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX2 detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
|
||||
}
|
||||
|
||||
static void __exit fini(void)
|
||||
{
|
||||
crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
|
||||
}
|
||||
|
||||
module_init(init);
|
||||
module_exit(fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
|
||||
MODULE_ALIAS("twofish");
|
||||
MODULE_ALIAS("twofish-asm");
|
@ -50,26 +50,18 @@
|
||||
/* 8-way parallel cipher functions */
|
||||
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
|
||||
|
||||
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
|
||||
|
||||
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
|
||||
|
||||
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(twofish_ctr_8way);
|
||||
|
||||
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
|
||||
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
|
||||
|
||||
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
|
||||
__twofish_enc_blk_3way(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(twofish_enc_blk));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(twofish_xts_enc);
|
||||
|
||||
void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(twofish_dec_blk));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(twofish_xts_dec);
|
||||
|
||||
|
||||
static const struct common_glue_ctx twofish_enc = {
|
||||
|
@ -1,43 +0,0 @@
|
||||
#ifndef ASM_X86_BLOWFISH_H
|
||||
#define ASM_X86_BLOWFISH_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/blowfish.h>
|
||||
|
||||
#define BF_PARALLEL_BLOCKS 4
|
||||
|
||||
/* regular block cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
|
||||
bool xor);
|
||||
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
|
||||
|
||||
/* 4-way parallel cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src, bool xor);
|
||||
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
#endif
|
@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
|
||||
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
/* 8-way parallel cipher functions */
|
||||
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
|
||||
/* helpers from twofish_x86_64-3way module */
|
||||
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
|
||||
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
|
||||
@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
|
||||
extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen);
|
||||
|
||||
/* helpers from twofish-avx module */
|
||||
extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
|
||||
extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
|
||||
|
||||
#endif /* ASM_X86_TWOFISH_H */
|
||||
|
@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL
|
||||
which will enable any routine to use the CRC-32-IEEE 802.3 checksum
|
||||
and gain better performance as compared with the table implementation.
|
||||
|
||||
config CRYPTO_CRCT10DIF
|
||||
tristate "CRCT10DIF algorithm"
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
CRC T10 Data Integrity Field computation is being cast as
|
||||
a crypto transform. This allows for faster crc t10 diff
|
||||
transforms to be used if they are available.
|
||||
|
||||
config CRYPTO_CRCT10DIF_PCLMUL
|
||||
tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
|
||||
depends on X86 && 64BIT && CRC_T10DIF
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
|
||||
CRC T10 DIF PCLMULQDQ computation can be hardware
|
||||
accelerated PCLMULQDQ instruction. This option will create
|
||||
'crct10dif-plcmul' module, which is faster when computing the
|
||||
crct10dif checksum as compared with the generic table implementation.
|
||||
|
||||
config CRYPTO_GHASH
|
||||
tristate "GHASH digest algorithm"
|
||||
select CRYPTO_GF128MUL
|
||||
@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64
|
||||
See also:
|
||||
<http://www.schneier.com/blowfish.html>
|
||||
|
||||
config CRYPTO_BLOWFISH_AVX2_X86_64
|
||||
tristate "Blowfish cipher algorithm (x86_64/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
depends on BROKEN
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_CRYPTD
|
||||
select CRYPTO_ABLK_HELPER_X86
|
||||
select CRYPTO_BLOWFISH_COMMON
|
||||
select CRYPTO_BLOWFISH_X86_64
|
||||
help
|
||||
Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier.
|
||||
|
||||
This is a variable key length cipher which can use keys from 32
|
||||
bits to 448 bits in length. It's fast, simple and specifically
|
||||
designed for use on "large microprocessors".
|
||||
|
||||
See also:
|
||||
<http://www.schneier.com/blowfish.html>
|
||||
|
||||
config CRYPTO_CAMELLIA
|
||||
tristate "Camellia cipher algorithms"
|
||||
depends on CRYPTO
|
||||
@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64
|
||||
See also:
|
||||
<http://www.schneier.com/twofish.html>
|
||||
|
||||
config CRYPTO_TWOFISH_AVX2_X86_64
|
||||
tristate "Twofish cipher algorithm (x86_64/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
depends on BROKEN
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_CRYPTD
|
||||
select CRYPTO_ABLK_HELPER_X86
|
||||
select CRYPTO_GLUE_HELPER_X86
|
||||
select CRYPTO_TWOFISH_COMMON
|
||||
select CRYPTO_TWOFISH_X86_64
|
||||
select CRYPTO_TWOFISH_X86_64_3WAY
|
||||
select CRYPTO_TWOFISH_AVX_X86_64
|
||||
select CRYPTO_LRW
|
||||
select CRYPTO_XTS
|
||||
help
|
||||
Twofish cipher algorithm (x86_64/AVX2).
|
||||
|
||||
Twofish was submitted as an AES (Advanced Encryption Standard)
|
||||
candidate cipher by researchers at CounterPane Systems. It is a
|
||||
16 round block cipher supporting key sizes of 128, 192, and 256
|
||||
bits.
|
||||
|
||||
See also:
|
||||
<http://www.schneier.com/twofish.html>
|
||||
|
||||
comment "Compression"
|
||||
|
||||
config CRYPTO_DEFLATE
|
||||
|
@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
|
||||
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
|
||||
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
|
||||
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
|
||||
obj-$(CONFIG_CRYPTO_842) += 842.o
|
||||
|
178
crypto/crct10dif.c
Normal file
178
crypto/crct10dif.c
Normal file
@ -0,0 +1,178 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* T10 Data Integrity Field CRC16 Crypto Transform
|
||||
*
|
||||
* Copyright (c) 2007 Oracle Corporation. All rights reserved.
|
||||
* Written by Martin K. Petersen <martin.petersen@oracle.com>
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
__u16 crc;
|
||||
};
|
||||
|
||||
/* Table generated using the following polynomium:
|
||||
* x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
|
||||
* gt: 0x8bb7
|
||||
*/
|
||||
static const __u16 t10_dif_crc_table[256] = {
|
||||
0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
|
||||
0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
|
||||
0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
|
||||
0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
|
||||
0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
|
||||
0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
|
||||
0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
|
||||
0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
|
||||
0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
|
||||
0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
|
||||
0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
|
||||
0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
|
||||
0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
|
||||
0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
|
||||
0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
|
||||
0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
|
||||
0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
|
||||
0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
|
||||
0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
|
||||
0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
|
||||
0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
|
||||
0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
|
||||
0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
|
||||
0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
|
||||
0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
|
||||
0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
|
||||
0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
|
||||
0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
|
||||
0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
|
||||
0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
|
||||
0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
|
||||
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
|
||||
};
|
||||
|
||||
__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < len ; i++)
|
||||
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
|
||||
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_generic);
|
||||
|
||||
/*
|
||||
* Steps through buffer one byte at at time, calculates reflected
|
||||
* crc using table.
|
||||
*/
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*(__u16 *)out = ctx->crc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, length, out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crct10dif",
|
||||
.cra_driver_name = "crct10dif-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crct10dif_mod_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = crypto_register_shash(&alg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit crct10dif_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crct10dif_mod_init);
|
||||
module_exit(crct10dif_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("T10 DIF CRC calculation.");
|
||||
MODULE_LICENSE("GPL");
|
@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { {
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-generic",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { {
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-generic",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -1174,6 +1174,10 @@ static int do_test(int m)
|
||||
ret += tcrypt_test("ghash");
|
||||
break;
|
||||
|
||||
case 47:
|
||||
ret += tcrypt_test("crct10dif");
|
||||
break;
|
||||
|
||||
case 100:
|
||||
ret += tcrypt_test("hmac(md5)");
|
||||
break;
|
||||
@ -1498,6 +1502,10 @@ static int do_test(int m)
|
||||
test_hash_speed("crc32c", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
|
||||
case 320:
|
||||
test_hash_speed("crct10dif", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
|
||||
case 399:
|
||||
break;
|
||||
|
||||
|
176
crypto/testmgr.c
176
crypto/testmgr.c
@ -184,8 +184,9 @@ static int do_one_async_hash_op(struct ahash_request *req,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
|
||||
unsigned int tcount, bool use_digest)
|
||||
static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
|
||||
unsigned int tcount, bool use_digest,
|
||||
const int align_offset)
|
||||
{
|
||||
const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
|
||||
unsigned int i, j, k, temp;
|
||||
@ -216,10 +217,15 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
|
||||
if (template[i].np)
|
||||
continue;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (WARN_ON(align_offset + template[i].psize > PAGE_SIZE))
|
||||
goto out;
|
||||
|
||||
j++;
|
||||
memset(result, 0, 64);
|
||||
|
||||
hash_buff = xbuf[0];
|
||||
hash_buff += align_offset;
|
||||
|
||||
memcpy(hash_buff, template[i].plaintext, template[i].psize);
|
||||
sg_init_one(&sg[0], hash_buff, template[i].psize);
|
||||
@ -281,6 +287,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < tcount; i++) {
|
||||
/* alignment tests are only done with continuous buffers */
|
||||
if (align_offset != 0)
|
||||
break;
|
||||
|
||||
if (template[i].np) {
|
||||
j++;
|
||||
memset(result, 0, 64);
|
||||
@ -358,9 +368,36 @@ out_nobuf:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
|
||||
unsigned int tcount, bool use_digest)
|
||||
{
|
||||
unsigned int alignmask;
|
||||
int ret;
|
||||
|
||||
ret = __test_hash(tfm, template, tcount, use_digest, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* test unaligned buffers, check with one byte offset */
|
||||
ret = __test_hash(tfm, template, tcount, use_digest, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
|
||||
if (alignmask) {
|
||||
/* Check if alignment mask for tfm is correctly set. */
|
||||
ret = __test_hash(tfm, template, tcount, use_digest,
|
||||
alignmask + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __test_aead(struct crypto_aead *tfm, int enc,
|
||||
struct aead_testvec *template, unsigned int tcount,
|
||||
const bool diff_dst)
|
||||
const bool diff_dst, const int align_offset)
|
||||
{
|
||||
const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
|
||||
unsigned int i, j, k, n, temp;
|
||||
@ -423,15 +460,16 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
|
||||
if (!template[i].np) {
|
||||
j++;
|
||||
|
||||
/* some tepmplates have no input data but they will
|
||||
/* some templates have no input data but they will
|
||||
* touch input
|
||||
*/
|
||||
input = xbuf[0];
|
||||
input += align_offset;
|
||||
assoc = axbuf[0];
|
||||
|
||||
ret = -EINVAL;
|
||||
if (WARN_ON(template[i].ilen > PAGE_SIZE ||
|
||||
template[i].alen > PAGE_SIZE))
|
||||
if (WARN_ON(align_offset + template[i].ilen >
|
||||
PAGE_SIZE || template[i].alen > PAGE_SIZE))
|
||||
goto out;
|
||||
|
||||
memcpy(input, template[i].input, template[i].ilen);
|
||||
@ -470,6 +508,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
|
||||
|
||||
if (diff_dst) {
|
||||
output = xoutbuf[0];
|
||||
output += align_offset;
|
||||
sg_init_one(&sgout[0], output,
|
||||
template[i].ilen +
|
||||
(enc ? authsize : 0));
|
||||
@ -530,6 +569,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < tcount; i++) {
|
||||
/* alignment tests are only done with continuous buffers */
|
||||
if (align_offset != 0)
|
||||
break;
|
||||
|
||||
if (template[i].np) {
|
||||
j++;
|
||||
|
||||
@ -732,15 +775,34 @@ out_noxbuf:
|
||||
static int test_aead(struct crypto_aead *tfm, int enc,
|
||||
struct aead_testvec *template, unsigned int tcount)
|
||||
{
|
||||
unsigned int alignmask;
|
||||
int ret;
|
||||
|
||||
/* test 'dst == src' case */
|
||||
ret = __test_aead(tfm, enc, template, tcount, false);
|
||||
ret = __test_aead(tfm, enc, template, tcount, false, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* test 'dst != src' case */
|
||||
return __test_aead(tfm, enc, template, tcount, true);
|
||||
ret = __test_aead(tfm, enc, template, tcount, true, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* test unaligned buffers, check with one byte offset */
|
||||
ret = __test_aead(tfm, enc, template, tcount, true, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
|
||||
if (alignmask) {
|
||||
/* Check if alignment mask for tfm is correctly set. */
|
||||
ret = __test_aead(tfm, enc, template, tcount, true,
|
||||
alignmask + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_cipher(struct crypto_cipher *tfm, int enc,
|
||||
@ -820,7 +882,7 @@ out_nobuf:
|
||||
|
||||
static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
|
||||
struct cipher_testvec *template, unsigned int tcount,
|
||||
const bool diff_dst)
|
||||
const bool diff_dst, const int align_offset)
|
||||
{
|
||||
const char *algo =
|
||||
crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
|
||||
@ -876,10 +938,12 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
|
||||
j++;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (WARN_ON(template[i].ilen > PAGE_SIZE))
|
||||
if (WARN_ON(align_offset + template[i].ilen >
|
||||
PAGE_SIZE))
|
||||
goto out;
|
||||
|
||||
data = xbuf[0];
|
||||
data += align_offset;
|
||||
memcpy(data, template[i].input, template[i].ilen);
|
||||
|
||||
crypto_ablkcipher_clear_flags(tfm, ~0);
|
||||
@ -900,6 +964,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
|
||||
sg_init_one(&sg[0], data, template[i].ilen);
|
||||
if (diff_dst) {
|
||||
data = xoutbuf[0];
|
||||
data += align_offset;
|
||||
sg_init_one(&sgout[0], data, template[i].ilen);
|
||||
}
|
||||
|
||||
@ -941,6 +1006,9 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < tcount; i++) {
|
||||
/* alignment tests are only done with continuous buffers */
|
||||
if (align_offset != 0)
|
||||
break;
|
||||
|
||||
if (template[i].iv)
|
||||
memcpy(iv, template[i].iv, MAX_IVLEN);
|
||||
@ -1075,15 +1143,34 @@ out_nobuf:
|
||||
static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
|
||||
struct cipher_testvec *template, unsigned int tcount)
|
||||
{
|
||||
unsigned int alignmask;
|
||||
int ret;
|
||||
|
||||
/* test 'dst == src' case */
|
||||
ret = __test_skcipher(tfm, enc, template, tcount, false);
|
||||
ret = __test_skcipher(tfm, enc, template, tcount, false, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* test 'dst != src' case */
|
||||
return __test_skcipher(tfm, enc, template, tcount, true);
|
||||
ret = __test_skcipher(tfm, enc, template, tcount, true, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* test unaligned buffers, check with one byte offset */
|
||||
ret = __test_skcipher(tfm, enc, template, tcount, true, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
|
||||
if (alignmask) {
|
||||
/* Check if alignment mask for tfm is correctly set. */
|
||||
ret = __test_skcipher(tfm, enc, template, tcount, true,
|
||||
alignmask + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
|
||||
@ -1653,16 +1740,10 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
}, {
|
||||
.alg = "__cbc-twofish-avx",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__cbc-twofish-avx2",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-cbc-aes-aesni",
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
}, {
|
||||
.alg = "__driver-cbc-blowfish-avx2",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-cbc-camellia-aesni",
|
||||
.test = alg_test_null,
|
||||
@ -1687,16 +1768,10 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
}, {
|
||||
.alg = "__driver-cbc-twofish-avx",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-cbc-twofish-avx2",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-ecb-aes-aesni",
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
}, {
|
||||
.alg = "__driver-ecb-blowfish-avx2",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-ecb-camellia-aesni",
|
||||
.test = alg_test_null,
|
||||
@ -1721,9 +1796,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
}, {
|
||||
.alg = "__driver-ecb-twofish-avx",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__driver-ecb-twofish-avx2",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "__ghash-pclmulqdqni",
|
||||
.test = alg_test_null,
|
||||
@ -1973,13 +2045,20 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.count = CRC32C_TEST_VECTORS
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "crct10dif",
|
||||
.test = alg_test_hash,
|
||||
.fips_allowed = 1,
|
||||
.suite = {
|
||||
.hash = {
|
||||
.vecs = crct10dif_tv_template,
|
||||
.count = CRCT10DIF_TEST_VECTORS
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "cryptd(__driver-cbc-aes-aesni)",
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-cbc-blowfish-avx2)",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-cbc-camellia-aesni)",
|
||||
.test = alg_test_null,
|
||||
@ -1993,9 +2072,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.alg = "cryptd(__driver-ecb-aes-aesni)",
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-ecb-blowfish-avx2)",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-ecb-camellia-aesni)",
|
||||
.test = alg_test_null,
|
||||
@ -2020,9 +2096,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
}, {
|
||||
.alg = "cryptd(__driver-ecb-twofish-avx)",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-ecb-twofish-avx2)",
|
||||
.test = alg_test_null,
|
||||
}, {
|
||||
.alg = "cryptd(__driver-gcm-aes-aesni)",
|
||||
.test = alg_test_null,
|
||||
@ -3068,6 +3141,35 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
}
|
||||
};
|
||||
|
||||
static bool alg_test_descs_checked;
|
||||
|
||||
static void alg_test_descs_check_order(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* only check once */
|
||||
if (alg_test_descs_checked)
|
||||
return;
|
||||
|
||||
alg_test_descs_checked = true;
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(alg_test_descs); i++) {
|
||||
int diff = strcmp(alg_test_descs[i - 1].alg,
|
||||
alg_test_descs[i].alg);
|
||||
|
||||
if (WARN_ON(diff > 0)) {
|
||||
pr_warn("testmgr: alg_test_descs entries in wrong order: '%s' before '%s'\n",
|
||||
alg_test_descs[i - 1].alg,
|
||||
alg_test_descs[i].alg);
|
||||
}
|
||||
|
||||
if (WARN_ON(diff == 0)) {
|
||||
pr_warn("testmgr: duplicate alg_test_descs entry: '%s'\n",
|
||||
alg_test_descs[i].alg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int alg_find_test(const char *alg)
|
||||
{
|
||||
int start = 0;
|
||||
@ -3099,6 +3201,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
|
||||
int j;
|
||||
int rc;
|
||||
|
||||
alg_test_descs_check_order();
|
||||
|
||||
if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) {
|
||||
char nalg[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
|
@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = {
|
||||
}
|
||||
};
|
||||
|
||||
#define CRCT10DIF_TEST_VECTORS 3
|
||||
static struct hash_testvec crct10dif_tv_template[] = {
|
||||
{
|
||||
.plaintext = "abc",
|
||||
.psize = 3,
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
.digest = "\x3b\x44",
|
||||
#else
|
||||
.digest = "\x44\x3b",
|
||||
#endif
|
||||
}, {
|
||||
.plaintext = "1234567890123456789012345678901234567890"
|
||||
"123456789012345678901234567890123456789",
|
||||
.psize = 79,
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
.digest = "\x70\x4b",
|
||||
#else
|
||||
.digest = "\x4b\x70",
|
||||
#endif
|
||||
}, {
|
||||
.plaintext =
|
||||
"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
|
||||
.psize = 56,
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
.digest = "\xe3\x9c",
|
||||
#else
|
||||
.digest = "\x9c\xe3",
|
||||
#endif
|
||||
.np = 2,
|
||||
.tap = { 28, 28 }
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* SHA1 test vectors from from FIPS PUB 180-1
|
||||
* Long vector from CAVS 5.0
|
||||
|
@ -108,8 +108,6 @@ static int atmel_trng_remove(struct platform_device *pdev)
|
||||
clk_disable(trng->clk);
|
||||
clk_put(trng->clk);
|
||||
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,6 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
|
||||
out_clk_disable:
|
||||
clk_disable(clk);
|
||||
out_free_rng:
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
kfree(rng);
|
||||
out_free_priv:
|
||||
kfree(priv);
|
||||
@ -154,7 +153,6 @@ static int bcm63xx_rng_remove(struct platform_device *pdev)
|
||||
clk_disable(priv->clk);
|
||||
kfree(priv);
|
||||
kfree(rng);
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -700,7 +700,7 @@ static int n2rng_probe(struct platform_device *op)
|
||||
if (err)
|
||||
goto out_free_units;
|
||||
|
||||
dev_set_drvdata(&op->dev, np);
|
||||
platform_set_drvdata(op, np);
|
||||
|
||||
schedule_delayed_work(&np->work, 0);
|
||||
|
||||
@ -721,7 +721,7 @@ out:
|
||||
|
||||
static int n2rng_remove(struct platform_device *op)
|
||||
{
|
||||
struct n2rng *np = dev_get_drvdata(&op->dev);
|
||||
struct n2rng *np = platform_get_drvdata(op);
|
||||
|
||||
np->flags |= N2RNG_FLAG_SHUTDOWN;
|
||||
|
||||
@ -736,8 +736,6 @@ static int n2rng_remove(struct platform_device *op)
|
||||
|
||||
kfree(np);
|
||||
|
||||
dev_set_drvdata(&op->dev, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
clk_enable(rng_clk);
|
||||
clk_prepare_enable(rng_clk);
|
||||
|
||||
ret = amba_request_regions(dev, dev->dev.init_name);
|
||||
if (ret)
|
||||
|
@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
|
||||
|
||||
rng->ops = ops;
|
||||
|
||||
dev_set_drvdata(&pdev->dev, &rng->ops);
|
||||
platform_set_drvdata(pdev, &rng->ops);
|
||||
ret = hwrng_register(&rng->ops);
|
||||
if (ret)
|
||||
return -ENOENT;
|
||||
@ -108,7 +108,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
|
||||
|
||||
static int __exit octeon_rng_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct hwrng *rng = dev_get_drvdata(&pdev->dev);
|
||||
struct hwrng *rng = platform_get_drvdata(pdev);
|
||||
|
||||
hwrng_unregister(rng);
|
||||
|
||||
|
@ -116,7 +116,7 @@ static int omap_rng_probe(struct platform_device *pdev)
|
||||
};
|
||||
|
||||
omap_rng_ops.priv = (unsigned long)priv;
|
||||
dev_set_drvdata(&pdev->dev, priv);
|
||||
platform_set_drvdata(pdev, priv);
|
||||
|
||||
priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res);
|
||||
@ -124,7 +124,7 @@ static int omap_rng_probe(struct platform_device *pdev)
|
||||
ret = PTR_ERR(priv->base);
|
||||
goto err_ioremap;
|
||||
}
|
||||
dev_set_drvdata(&pdev->dev, priv);
|
||||
platform_set_drvdata(pdev, priv);
|
||||
|
||||
pm_runtime_enable(&pdev->dev);
|
||||
pm_runtime_get_sync(&pdev->dev);
|
||||
@ -151,7 +151,7 @@ err_ioremap:
|
||||
|
||||
static int __exit omap_rng_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct omap_rng_private_data *priv = dev_get_drvdata(&pdev->dev);
|
||||
struct omap_rng_private_data *priv = platform_get_drvdata(pdev);
|
||||
|
||||
hwrng_unregister(&omap_rng_ops);
|
||||
|
||||
|
@ -192,7 +192,6 @@ out_release_io:
|
||||
out_timer:
|
||||
del_timer_sync(&priv->timer);
|
||||
out_free:
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
kfree(priv);
|
||||
return err;
|
||||
}
|
||||
@ -209,7 +208,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev)
|
||||
del_timer_sync(&priv->timer);
|
||||
iounmap(priv->io_base);
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
kfree(priv);
|
||||
|
||||
return 0;
|
||||
|
@ -154,7 +154,6 @@ static int __exit tx4939_rng_remove(struct platform_device *dev)
|
||||
struct tx4939_rng *rngdev = platform_get_drvdata(dev);
|
||||
|
||||
hwrng_unregister(&rngdev->rng);
|
||||
platform_set_drvdata(dev, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -278,7 +278,7 @@ config CRYPTO_DEV_PICOXCELL
|
||||
|
||||
config CRYPTO_DEV_SAHARA
|
||||
tristate "Support for SAHARA crypto accelerator"
|
||||
depends on ARCH_MXC && EXPERIMENTAL && OF
|
||||
depends on ARCH_MXC && OF
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_ECB
|
||||
@ -286,6 +286,16 @@ config CRYPTO_DEV_SAHARA
|
||||
This option enables support for the SAHARA HW crypto accelerator
|
||||
found in some Freescale i.MX chips.
|
||||
|
||||
config CRYPTO_DEV_DCP
|
||||
tristate "Support for the DCP engine"
|
||||
depends on ARCH_MXS && OF
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_CBC
|
||||
help
|
||||
This options enables support for the hardware crypto-acceleration
|
||||
capabilities of the DCP co-processor
|
||||
|
||||
config CRYPTO_DEV_S5P
|
||||
tristate "Support for Samsung S5PV210 crypto accelerator"
|
||||
depends on ARCH_S5PV210
|
||||
|
@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
|
||||
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
|
||||
|
@ -202,6 +202,7 @@ static int caam_probe(struct platform_device *pdev)
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct caam_perfmon *perfmon;
|
||||
#endif
|
||||
u64 cha_vid;
|
||||
|
||||
ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL);
|
||||
if (!ctrlpriv)
|
||||
@ -293,11 +294,14 @@ static int caam_probe(struct platform_device *pdev)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
cha_vid = rd_reg64(&topregs->ctrl.perfmon.cha_id);
|
||||
|
||||
/*
|
||||
* RNG4 based SECs (v5+) need special initialization prior
|
||||
* to executing any descriptors
|
||||
* If SEC has RNG version >= 4 and RNG state handle has not been
|
||||
* already instantiated ,do RNG instantiation
|
||||
*/
|
||||
if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) {
|
||||
if ((cha_vid & CHA_ID_RNG_MASK) >> CHA_ID_RNG_SHIFT >= 4 &&
|
||||
!(rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IF0)) {
|
||||
kick_trng(pdev);
|
||||
ret = instantiate_rng(ctrlpriv->jrdev[0]);
|
||||
if (ret) {
|
||||
|
@ -231,7 +231,12 @@ struct sec4_sg_entry {
|
||||
#define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT)
|
||||
#define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT)
|
||||
|
||||
/* Offset in source/destination */
|
||||
@ -366,6 +371,7 @@ struct sec4_sg_entry {
|
||||
#define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT)
|
||||
#define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT)
|
||||
#define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT)
|
||||
#define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT)
|
||||
|
||||
#define FIFOLDST_LEN_MASK 0xffff
|
||||
#define FIFOLDST_EXT_LEN_MASK 0xffffffff
|
||||
@ -1294,10 +1300,10 @@ struct sec4_sg_entry {
|
||||
#define SQOUT_SGF 0x01000000
|
||||
|
||||
/* Appends to a previous pointer */
|
||||
#define SQOUT_PRE 0x00800000
|
||||
#define SQOUT_PRE SQIN_PRE
|
||||
|
||||
/* Restore sequence with pointer/length */
|
||||
#define SQOUT_RTO 0x00200000
|
||||
#define SQOUT_RTO SQIN_RTO
|
||||
|
||||
/* Use extended length following pointer */
|
||||
#define SQOUT_EXT 0x00400000
|
||||
@ -1359,6 +1365,7 @@ struct sec4_sg_entry {
|
||||
#define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT)
|
||||
#define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT)
|
||||
@ -1411,6 +1418,7 @@ struct sec4_sg_entry {
|
||||
#define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT)
|
||||
@ -1425,6 +1433,7 @@ struct sec4_sg_entry {
|
||||
#define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT)
|
||||
#define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT)
|
||||
#define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT)
|
||||
#define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT)
|
||||
#define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT)
|
||||
#define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT)
|
||||
#define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT)
|
||||
@ -1600,4 +1609,13 @@ struct sec4_sg_entry {
|
||||
#define NFIFOENTRY_PLEN_SHIFT 0
|
||||
#define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT)
|
||||
|
||||
/* Append Load Immediate Command */
|
||||
#define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000
|
||||
|
||||
/* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */
|
||||
#define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000
|
||||
|
||||
/* Frame Descriptor Command for Replacement Job Descriptor */
|
||||
#define FD_CMD_REPLACE_JOB_DESC 0x20000000
|
||||
|
||||
#endif /* DESC_H */
|
||||
|
@ -110,6 +110,26 @@ static inline void append_cmd(u32 *desc, u32 command)
|
||||
(*desc)++;
|
||||
}
|
||||
|
||||
#define append_u32 append_cmd
|
||||
|
||||
static inline void append_u64(u32 *desc, u64 data)
|
||||
{
|
||||
u32 *offset = desc_end(desc);
|
||||
|
||||
*offset = upper_32_bits(data);
|
||||
*(++offset) = lower_32_bits(data);
|
||||
|
||||
(*desc) += 2;
|
||||
}
|
||||
|
||||
/* Write command without affecting header, and return pointer to next word */
|
||||
static inline u32 *write_cmd(u32 *desc, u32 command)
|
||||
{
|
||||
*desc = command;
|
||||
|
||||
return desc + 1;
|
||||
}
|
||||
|
||||
static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
|
||||
u32 command)
|
||||
{
|
||||
@ -122,7 +142,8 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
|
||||
unsigned int len, u32 command)
|
||||
{
|
||||
append_cmd(desc, command);
|
||||
append_ptr(desc, ptr);
|
||||
if (!(command & (SQIN_RTO | SQIN_PRE)))
|
||||
append_ptr(desc, ptr);
|
||||
append_cmd(desc, len);
|
||||
}
|
||||
|
||||
@ -176,17 +197,36 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
|
||||
}
|
||||
APPEND_CMD_PTR(key, KEY)
|
||||
APPEND_CMD_PTR(load, LOAD)
|
||||
APPEND_CMD_PTR(store, STORE)
|
||||
APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
|
||||
APPEND_CMD_PTR(fifo_store, FIFO_STORE)
|
||||
|
||||
static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
|
||||
u32 options)
|
||||
{
|
||||
u32 cmd_src;
|
||||
|
||||
cmd_src = options & LDST_SRCDST_MASK;
|
||||
|
||||
append_cmd(desc, CMD_STORE | options | len);
|
||||
|
||||
/* The following options do not require pointer */
|
||||
if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED ||
|
||||
cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB ||
|
||||
cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE ||
|
||||
cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE))
|
||||
append_ptr(desc, ptr);
|
||||
}
|
||||
|
||||
#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
|
||||
static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
|
||||
unsigned int len, \
|
||||
u32 options) \
|
||||
{ \
|
||||
PRINT_POS; \
|
||||
append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
|
||||
if (options & (SQIN_RTO | SQIN_PRE)) \
|
||||
append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \
|
||||
else \
|
||||
append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
|
||||
}
|
||||
APPEND_SEQ_PTR_INTLEN(in, IN)
|
||||
APPEND_SEQ_PTR_INTLEN(out, OUT)
|
||||
@ -259,7 +299,7 @@ APPEND_CMD_RAW_IMM(load, LOAD, u32);
|
||||
*/
|
||||
#define APPEND_MATH(op, desc, dest, src_0, src_1, len) \
|
||||
append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
|
||||
MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK));
|
||||
MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len);
|
||||
|
||||
#define append_math_add(desc, dest, src0, src1, len) \
|
||||
APPEND_MATH(ADD, desc, dest, src0, src1, len)
|
||||
@ -279,6 +319,8 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
|
||||
APPEND_MATH(LSHIFT, desc, dest, src0, src1, len)
|
||||
#define append_math_rshift(desc, dest, src0, src1, len) \
|
||||
APPEND_MATH(RSHIFT, desc, dest, src0, src1, len)
|
||||
#define append_math_ldshift(desc, dest, src0, src1, len) \
|
||||
APPEND_MATH(SHLD, desc, dest, src0, src1, len)
|
||||
|
||||
/* Exactly one source is IMM. Data is passed in as u32 value */
|
||||
#define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \
|
||||
@ -305,3 +347,34 @@ do { \
|
||||
APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data)
|
||||
#define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data)
|
||||
|
||||
/* Exactly one source is IMM. Data is passed in as u64 value */
|
||||
#define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \
|
||||
do { \
|
||||
u32 upper = (data >> 16) >> 16; \
|
||||
APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \
|
||||
(upper ? 0 : MATH_IFB)); \
|
||||
if (upper) \
|
||||
append_u64(desc, data); \
|
||||
else \
|
||||
append_u32(desc, data); \
|
||||
} while (0)
|
||||
|
||||
#define append_math_add_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data)
|
||||
#define append_math_sub_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data)
|
||||
#define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data)
|
||||
#define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data)
|
||||
#define append_math_and_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data)
|
||||
#define append_math_or_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data)
|
||||
#define append_math_xor_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data)
|
||||
#define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data)
|
||||
#define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \
|
||||
APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data)
|
||||
|
@ -44,6 +44,7 @@
|
||||
#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */
|
||||
#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */
|
||||
#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */
|
||||
#define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/
|
||||
#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */
|
||||
#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */
|
||||
#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */
|
||||
|
@ -117,6 +117,43 @@ struct jr_outentry {
|
||||
#define CHA_NUM_DECONUM_SHIFT 56
|
||||
#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT)
|
||||
|
||||
/* CHA Version IDs */
|
||||
#define CHA_ID_AES_SHIFT 0
|
||||
#define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT)
|
||||
|
||||
#define CHA_ID_DES_SHIFT 4
|
||||
#define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT)
|
||||
|
||||
#define CHA_ID_ARC4_SHIFT 8
|
||||
#define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT)
|
||||
|
||||
#define CHA_ID_MD_SHIFT 12
|
||||
#define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT)
|
||||
|
||||
#define CHA_ID_RNG_SHIFT 16
|
||||
#define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT)
|
||||
|
||||
#define CHA_ID_SNW8_SHIFT 20
|
||||
#define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT)
|
||||
|
||||
#define CHA_ID_KAS_SHIFT 24
|
||||
#define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT)
|
||||
|
||||
#define CHA_ID_PK_SHIFT 28
|
||||
#define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT)
|
||||
|
||||
#define CHA_ID_CRC_SHIFT 32
|
||||
#define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT)
|
||||
|
||||
#define CHA_ID_SNW9_SHIFT 36
|
||||
#define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT)
|
||||
|
||||
#define CHA_ID_DECO_SHIFT 56
|
||||
#define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT)
|
||||
|
||||
#define CHA_ID_JR_SHIFT 60
|
||||
#define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT)
|
||||
|
||||
struct sec_vid {
|
||||
u16 ip_id;
|
||||
u8 maj_rev;
|
||||
@ -228,7 +265,10 @@ struct rng4tst {
|
||||
u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */
|
||||
u32 rtfrqcnt; /* PRGM=0: freq. count register */
|
||||
};
|
||||
u32 rsvd1[56];
|
||||
u32 rsvd1[40];
|
||||
#define RDSTA_IF0 0x00000001
|
||||
u32 rdsta;
|
||||
u32 rsvd2[15];
|
||||
};
|
||||
|
||||
/*
|
||||
|
912
drivers/crypto/dcp.c
Normal file
912
drivers/crypto/dcp.c
Normal file
@ -0,0 +1,912 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* Support for DCP cryptographic accelerator.
|
||||
*
|
||||
* Copyright (c) 2013
|
||||
* Author: Tobias Rauter <tobias.rauter@gmail.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/miscdevice.h>
|
||||
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
|
||||
/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/
|
||||
#define DBS_IOCTL_BASE 'd'
|
||||
#define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16])
|
||||
#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16])
|
||||
|
||||
/* DCP channel used for AES */
|
||||
#define USED_CHANNEL 1
|
||||
/* Ring Buffers' maximum size */
|
||||
#define DCP_MAX_PKG 20
|
||||
|
||||
/* Control Register */
|
||||
#define DCP_REG_CTRL 0x000
|
||||
#define DCP_CTRL_SFRST (1<<31)
|
||||
#define DCP_CTRL_CLKGATE (1<<30)
|
||||
#define DCP_CTRL_CRYPTO_PRESENT (1<<29)
|
||||
#define DCP_CTRL_SHA_PRESENT (1<<28)
|
||||
#define DCP_CTRL_GATHER_RES_WRITE (1<<23)
|
||||
#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22)
|
||||
#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21)
|
||||
#define DCP_CTRL_CH_IRQ_E_0 0x01
|
||||
#define DCP_CTRL_CH_IRQ_E_1 0x02
|
||||
#define DCP_CTRL_CH_IRQ_E_2 0x04
|
||||
#define DCP_CTRL_CH_IRQ_E_3 0x08
|
||||
|
||||
/* Status register */
|
||||
#define DCP_REG_STAT 0x010
|
||||
#define DCP_STAT_OTP_KEY_READY (1<<28)
|
||||
#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F)
|
||||
#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F)
|
||||
#define DCP_STAT_IRQ(stat) (stat&0x0F)
|
||||
#define DCP_STAT_CHAN_0 (0x01)
|
||||
#define DCP_STAT_CHAN_1 (0x02)
|
||||
#define DCP_STAT_CHAN_2 (0x04)
|
||||
#define DCP_STAT_CHAN_3 (0x08)
|
||||
|
||||
/* Channel Control Register */
|
||||
#define DCP_REG_CHAN_CTRL 0x020
|
||||
#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16)
|
||||
#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100)
|
||||
#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200)
|
||||
#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400)
|
||||
#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800)
|
||||
#define DCP_CHAN_CTRL_ENABLE_0 (0x01)
|
||||
#define DCP_CHAN_CTRL_ENABLE_1 (0x02)
|
||||
#define DCP_CHAN_CTRL_ENABLE_2 (0x04)
|
||||
#define DCP_CHAN_CTRL_ENABLE_3 (0x08)
|
||||
|
||||
/*
|
||||
* Channel Registers:
|
||||
* The DCP has 4 channels. Each of this channels
|
||||
* has 4 registers (command pointer, semaphore, status and options).
|
||||
* The address of register REG of channel CHAN is obtained by
|
||||
* dcp_chan_reg(REG, CHAN)
|
||||
*/
|
||||
#define DCP_REG_CHAN_PTR 0x00000100
|
||||
#define DCP_REG_CHAN_SEMA 0x00000110
|
||||
#define DCP_REG_CHAN_STAT 0x00000120
|
||||
#define DCP_REG_CHAN_OPT 0x00000130
|
||||
|
||||
#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000
|
||||
#define DCP_CHAN_STAT_NO_CHAIN 0x020000
|
||||
#define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000
|
||||
#define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000
|
||||
#define DCP_CHAN_STAT_INVALID_MODE 0x050000
|
||||
#define DCP_CHAN_STAT_PAGEFAULT 0x40
|
||||
#define DCP_CHAN_STAT_DST 0x20
|
||||
#define DCP_CHAN_STAT_SRC 0x10
|
||||
#define DCP_CHAN_STAT_PACKET 0x08
|
||||
#define DCP_CHAN_STAT_SETUP 0x04
|
||||
#define DCP_CHAN_STAT_MISMATCH 0x02
|
||||
|
||||
/* hw packet control*/
|
||||
|
||||
#define DCP_PKT_PAYLOAD_KEY (1<<11)
|
||||
#define DCP_PKT_OTP_KEY (1<<10)
|
||||
#define DCP_PKT_CIPHER_INIT (1<<9)
|
||||
#define DCP_PKG_CIPHER_ENCRYPT (1<<8)
|
||||
#define DCP_PKT_CIPHER_ENABLE (1<<5)
|
||||
#define DCP_PKT_DECR_SEM (1<<1)
|
||||
#define DCP_PKT_CHAIN (1<<2)
|
||||
#define DCP_PKT_IRQ 1
|
||||
|
||||
#define DCP_PKT_MODE_CBC (1<<4)
|
||||
#define DCP_PKT_KEYSELECT_OTP (0xFF<<8)
|
||||
|
||||
/* cipher flags */
|
||||
#define DCP_ENC 0x0001
|
||||
#define DCP_DEC 0x0002
|
||||
#define DCP_ECB 0x0004
|
||||
#define DCP_CBC 0x0008
|
||||
#define DCP_CBC_INIT 0x0010
|
||||
#define DCP_NEW_KEY 0x0040
|
||||
#define DCP_OTP_KEY 0x0080
|
||||
#define DCP_AES 0x1000
|
||||
|
||||
/* DCP Flags */
|
||||
#define DCP_FLAG_BUSY 0x01
|
||||
#define DCP_FLAG_PRODUCING 0x02
|
||||
|
||||
/* clock defines */
|
||||
#define CLOCK_ON 1
|
||||
#define CLOCK_OFF 0
|
||||
|
||||
struct dcp_dev_req_ctx {
|
||||
int mode;
|
||||
};
|
||||
|
||||
struct dcp_op {
|
||||
unsigned int flags;
|
||||
u8 key[AES_KEYSIZE_128];
|
||||
int keylen;
|
||||
|
||||
struct ablkcipher_request *req;
|
||||
struct crypto_ablkcipher *fallback;
|
||||
|
||||
uint32_t stat;
|
||||
uint32_t pkt1;
|
||||
uint32_t pkt2;
|
||||
struct ablkcipher_walk walk;
|
||||
};
|
||||
|
||||
struct dcp_dev {
|
||||
struct device *dev;
|
||||
void __iomem *dcp_regs_base;
|
||||
|
||||
int dcp_vmi_irq;
|
||||
int dcp_irq;
|
||||
|
||||
spinlock_t queue_lock;
|
||||
struct crypto_queue queue;
|
||||
|
||||
uint32_t pkt_produced;
|
||||
uint32_t pkt_consumed;
|
||||
|
||||
struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG];
|
||||
dma_addr_t hw_phys_pkg;
|
||||
|
||||
/* [KEY][IV] Both with 16 Bytes */
|
||||
u8 *payload_base;
|
||||
dma_addr_t payload_base_dma;
|
||||
|
||||
|
||||
struct tasklet_struct done_task;
|
||||
struct tasklet_struct queue_task;
|
||||
struct timer_list watchdog;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
struct dcp_op *ctx;
|
||||
|
||||
struct miscdevice dcp_bootstream_misc;
|
||||
};
|
||||
|
||||
struct dcp_hw_packet {
|
||||
uint32_t next;
|
||||
uint32_t pkt1;
|
||||
uint32_t pkt2;
|
||||
uint32_t src;
|
||||
uint32_t dst;
|
||||
uint32_t size;
|
||||
uint32_t payload;
|
||||
uint32_t stat;
|
||||
};
|
||||
|
||||
static struct dcp_dev *global_dev;
|
||||
|
||||
static inline u32 dcp_chan_reg(u32 reg, int chan)
|
||||
{
|
||||
return reg + (chan) * 0x40;
|
||||
}
|
||||
|
||||
static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg)
|
||||
{
|
||||
writel(data, dev->dcp_regs_base + reg);
|
||||
}
|
||||
|
||||
static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg)
|
||||
{
|
||||
writel(data, dev->dcp_regs_base + (reg | 0x04));
|
||||
}
|
||||
|
||||
static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg)
|
||||
{
|
||||
writel(data, dev->dcp_regs_base + (reg | 0x08));
|
||||
}
|
||||
|
||||
static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg)
|
||||
{
|
||||
writel(data, dev->dcp_regs_base + (reg | 0x0C));
|
||||
}
|
||||
|
||||
static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg)
|
||||
{
|
||||
return readl(dev->dcp_regs_base + reg);
|
||||
}
|
||||
|
||||
static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt)
|
||||
{
|
||||
dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
|
||||
dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE);
|
||||
dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt);
|
||||
}
|
||||
|
||||
static int dcp_dma_map(struct dcp_dev *dev,
|
||||
struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt)
|
||||
{
|
||||
dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt);
|
||||
/* align to length = 16 */
|
||||
pkt->size = walk->nbytes - (walk->nbytes % 16);
|
||||
|
||||
pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset,
|
||||
pkt->size, DMA_TO_DEVICE);
|
||||
|
||||
if (pkt->src == 0) {
|
||||
dev_err(dev->dev, "Unable to map src");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset,
|
||||
pkt->size, DMA_FROM_DEVICE);
|
||||
|
||||
if (pkt->dst == 0) {
|
||||
dev_err(dev->dev, "Unable to map dst");
|
||||
dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt,
|
||||
uint8_t last)
|
||||
{
|
||||
struct dcp_op *ctx = dev->ctx;
|
||||
pkt->pkt1 = ctx->pkt1;
|
||||
pkt->pkt2 = ctx->pkt2;
|
||||
|
||||
pkt->payload = (u32) dev->payload_base_dma;
|
||||
pkt->stat = 0;
|
||||
|
||||
if (ctx->flags & DCP_CBC_INIT) {
|
||||
pkt->pkt1 |= DCP_PKT_CIPHER_INIT;
|
||||
ctx->flags &= ~DCP_CBC_INIT;
|
||||
}
|
||||
|
||||
mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500));
|
||||
pkt->pkt1 |= DCP_PKT_IRQ;
|
||||
if (!last)
|
||||
pkt->pkt1 |= DCP_PKT_CHAIN;
|
||||
|
||||
dev->pkt_produced++;
|
||||
|
||||
dcp_write(dev, 1,
|
||||
dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL));
|
||||
}
|
||||
|
||||
static void dcp_op_proceed(struct dcp_dev *dev)
|
||||
{
|
||||
struct dcp_op *ctx = dev->ctx;
|
||||
struct dcp_hw_packet *pkt;
|
||||
|
||||
while (ctx->walk.nbytes) {
|
||||
int err = 0;
|
||||
|
||||
pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG];
|
||||
err = dcp_dma_map(dev, &ctx->walk, pkt);
|
||||
if (err) {
|
||||
dev->ctx->stat |= err;
|
||||
/* start timer to wait for already set up calls */
|
||||
mod_timer(&dev->watchdog,
|
||||
jiffies + msecs_to_jiffies(500));
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
err = ctx->walk.nbytes - pkt->size;
|
||||
ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err);
|
||||
|
||||
dcp_op_one(dev, pkt, ctx->walk.nbytes == 0);
|
||||
/* we have to wait if no space is left in buffer */
|
||||
if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG)
|
||||
break;
|
||||
}
|
||||
clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
|
||||
}
|
||||
|
||||
static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk)
|
||||
{
|
||||
struct dcp_op *ctx = dev->ctx;
|
||||
|
||||
if (ctx->flags & DCP_NEW_KEY) {
|
||||
memcpy(dev->payload_base, ctx->key, ctx->keylen);
|
||||
ctx->flags &= ~DCP_NEW_KEY;
|
||||
}
|
||||
|
||||
ctx->pkt1 = 0;
|
||||
ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE;
|
||||
ctx->pkt1 |= DCP_PKT_DECR_SEM;
|
||||
|
||||
if (ctx->flags & DCP_OTP_KEY)
|
||||
ctx->pkt1 |= DCP_PKT_OTP_KEY;
|
||||
else
|
||||
ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY;
|
||||
|
||||
if (ctx->flags & DCP_ENC)
|
||||
ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT;
|
||||
|
||||
ctx->pkt2 = 0;
|
||||
if (ctx->flags & DCP_CBC)
|
||||
ctx->pkt2 |= DCP_PKT_MODE_CBC;
|
||||
|
||||
dev->pkt_produced = 0;
|
||||
dev->pkt_consumed = 0;
|
||||
|
||||
ctx->stat = 0;
|
||||
dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
|
||||
dcp_write(dev, (u32) dev->hw_phys_pkg,
|
||||
dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL));
|
||||
|
||||
set_bit(DCP_FLAG_PRODUCING, &dev->flags);
|
||||
|
||||
if (use_walk) {
|
||||
ablkcipher_walk_init(&ctx->walk, ctx->req->dst,
|
||||
ctx->req->src, ctx->req->nbytes);
|
||||
ablkcipher_walk_phys(ctx->req, &ctx->walk);
|
||||
dcp_op_proceed(dev);
|
||||
} else {
|
||||
dcp_op_one(dev, dev->hw_pkg[0], 1);
|
||||
clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void dcp_done_task(unsigned long data)
|
||||
{
|
||||
struct dcp_dev *dev = (struct dcp_dev *)data;
|
||||
struct dcp_hw_packet *last_packet;
|
||||
int fin;
|
||||
fin = 0;
|
||||
|
||||
for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG];
|
||||
last_packet->stat == 1;
|
||||
last_packet =
|
||||
dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) {
|
||||
|
||||
dcp_dma_unmap(dev, last_packet);
|
||||
last_packet->stat = 0;
|
||||
fin++;
|
||||
}
|
||||
/* the last call of this function already consumed this IRQ's packet */
|
||||
if (fin == 0)
|
||||
return;
|
||||
|
||||
dev_dbg(dev->dev,
|
||||
"Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d",
|
||||
dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed);
|
||||
|
||||
last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG];
|
||||
if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) {
|
||||
if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags))
|
||||
dcp_op_proceed(dev);
|
||||
return;
|
||||
}
|
||||
|
||||
while (unlikely(dev->pkt_consumed < dev->pkt_produced)) {
|
||||
dcp_dma_unmap(dev,
|
||||
dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]);
|
||||
}
|
||||
|
||||
if (dev->ctx->flags & DCP_OTP_KEY) {
|
||||
/* we used the miscdevice, no walk to finish */
|
||||
clear_bit(DCP_FLAG_BUSY, &dev->flags);
|
||||
return;
|
||||
}
|
||||
|
||||
ablkcipher_walk_complete(&dev->ctx->walk);
|
||||
dev->ctx->req->base.complete(&dev->ctx->req->base,
|
||||
dev->ctx->stat);
|
||||
dev->ctx->req = NULL;
|
||||
/* in case there are other requests in the queue */
|
||||
tasklet_schedule(&dev->queue_task);
|
||||
}
|
||||
|
||||
static void dcp_watchdog(unsigned long data)
|
||||
{
|
||||
struct dcp_dev *dev = (struct dcp_dev *)data;
|
||||
dev->ctx->stat |= dcp_read(dev,
|
||||
dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
|
||||
|
||||
dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat);
|
||||
|
||||
if (!dev->ctx->stat)
|
||||
dev->ctx->stat = -ETIMEDOUT;
|
||||
|
||||
dcp_done_task(data);
|
||||
}
|
||||
|
||||
|
||||
static irqreturn_t dcp_common_irq(int irq, void *context)
|
||||
{
|
||||
u32 msk;
|
||||
struct dcp_dev *dev = (struct dcp_dev *) context;
|
||||
|
||||
del_timer(&dev->watchdog);
|
||||
|
||||
msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT));
|
||||
dcp_clear(dev, msk, DCP_REG_STAT);
|
||||
if (msk == 0)
|
||||
return IRQ_NONE;
|
||||
|
||||
dev->ctx->stat |= dcp_read(dev,
|
||||
dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
|
||||
|
||||
if (msk & DCP_STAT_CHAN_1)
|
||||
tasklet_schedule(&dev->done_task);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t dcp_vmi_irq(int irq, void *context)
|
||||
{
|
||||
return dcp_common_irq(irq, context);
|
||||
}
|
||||
|
||||
static irqreturn_t dcp_irq(int irq, void *context)
|
||||
{
|
||||
return dcp_common_irq(irq, context);
|
||||
}
|
||||
|
||||
static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx)
|
||||
{
|
||||
dev->ctx = ctx;
|
||||
|
||||
if ((ctx->flags & DCP_CBC) && ctx->req->info) {
|
||||
ctx->flags |= DCP_CBC_INIT;
|
||||
memcpy(dev->payload_base + AES_KEYSIZE_128,
|
||||
ctx->req->info, AES_KEYSIZE_128);
|
||||
}
|
||||
|
||||
dcp_op_start(dev, 1);
|
||||
}
|
||||
|
||||
static void dcp_queue_task(unsigned long data)
|
||||
{
|
||||
struct dcp_dev *dev = (struct dcp_dev *) data;
|
||||
struct crypto_async_request *async_req, *backlog;
|
||||
struct crypto_ablkcipher *tfm;
|
||||
struct dcp_op *ctx;
|
||||
struct dcp_dev_req_ctx *rctx;
|
||||
struct ablkcipher_request *req;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dev->queue_lock, flags);
|
||||
|
||||
backlog = crypto_get_backlog(&dev->queue);
|
||||
async_req = crypto_dequeue_request(&dev->queue);
|
||||
|
||||
spin_unlock_irqrestore(&dev->queue_lock, flags);
|
||||
|
||||
if (!async_req)
|
||||
goto ret_nothing_done;
|
||||
|
||||
if (backlog)
|
||||
backlog->complete(backlog, -EINPROGRESS);
|
||||
|
||||
req = ablkcipher_request_cast(async_req);
|
||||
tfm = crypto_ablkcipher_reqtfm(req);
|
||||
rctx = ablkcipher_request_ctx(req);
|
||||
ctx = crypto_ablkcipher_ctx(tfm);
|
||||
|
||||
if (!req->src || !req->dst)
|
||||
goto ret_nothing_done;
|
||||
|
||||
ctx->flags |= rctx->mode;
|
||||
ctx->req = req;
|
||||
|
||||
dcp_crypt(dev, ctx);
|
||||
|
||||
return;
|
||||
|
||||
ret_nothing_done:
|
||||
clear_bit(DCP_FLAG_BUSY, &dev->flags);
|
||||
}
|
||||
|
||||
|
||||
static int dcp_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
const char *name = tfm->__crt_alg->cra_name;
|
||||
struct dcp_op *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx);
|
||||
|
||||
ctx->fallback = crypto_alloc_ablkcipher(name, 0,
|
||||
CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
|
||||
|
||||
if (IS_ERR(ctx->fallback)) {
|
||||
dev_err(global_dev->dev, "Error allocating fallback algo %s\n",
|
||||
name);
|
||||
return PTR_ERR(ctx->fallback);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dcp_cra_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct dcp_op *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (ctx->fallback)
|
||||
crypto_free_ablkcipher(ctx->fallback);
|
||||
|
||||
ctx->fallback = NULL;
|
||||
}
|
||||
|
||||
/* async interface */
|
||||
static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
|
||||
unsigned int len)
|
||||
{
|
||||
struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm);
|
||||
unsigned int ret = 0;
|
||||
ctx->keylen = len;
|
||||
ctx->flags = 0;
|
||||
if (len == AES_KEYSIZE_128) {
|
||||
if (memcmp(ctx->key, key, AES_KEYSIZE_128)) {
|
||||
memcpy(ctx->key, key, len);
|
||||
ctx->flags |= DCP_NEW_KEY;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
|
||||
ctx->fallback->base.crt_flags |=
|
||||
(tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
|
||||
|
||||
ret = crypto_ablkcipher_setkey(ctx->fallback, key, len);
|
||||
if (ret) {
|
||||
struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
|
||||
|
||||
tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
|
||||
tfm_aux->crt_flags |=
|
||||
(ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode)
|
||||
{
|
||||
struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req);
|
||||
struct dcp_dev *dev = global_dev;
|
||||
unsigned long flags;
|
||||
int err = 0;
|
||||
|
||||
if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
rctx->mode = mode;
|
||||
|
||||
spin_lock_irqsave(&dev->queue_lock, flags);
|
||||
err = ablkcipher_enqueue_request(&dev->queue, req);
|
||||
spin_unlock_irqrestore(&dev->queue_lock, flags);
|
||||
|
||||
flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags);
|
||||
|
||||
if (!(flags & DCP_FLAG_BUSY))
|
||||
tasklet_schedule(&dev->queue_task);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_tfm *tfm =
|
||||
crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
|
||||
struct dcp_op *ctx = crypto_ablkcipher_ctx(
|
||||
crypto_ablkcipher_reqtfm(req));
|
||||
|
||||
if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
|
||||
int err = 0;
|
||||
ablkcipher_request_set_tfm(req, ctx->fallback);
|
||||
err = crypto_ablkcipher_encrypt(req);
|
||||
ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
|
||||
return err;
|
||||
}
|
||||
|
||||
return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC);
|
||||
}
|
||||
|
||||
static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_tfm *tfm =
|
||||
crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
|
||||
struct dcp_op *ctx = crypto_ablkcipher_ctx(
|
||||
crypto_ablkcipher_reqtfm(req));
|
||||
|
||||
if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
|
||||
int err = 0;
|
||||
ablkcipher_request_set_tfm(req, ctx->fallback);
|
||||
err = crypto_ablkcipher_decrypt(req);
|
||||
ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
|
||||
return err;
|
||||
}
|
||||
return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC);
|
||||
}
|
||||
|
||||
static struct crypto_alg algs[] = {
|
||||
{
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "dcp-cbc-aes",
|
||||
.cra_alignmask = 3,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_NEED_FALLBACK,
|
||||
.cra_blocksize = AES_KEYSIZE_128,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_priority = 300,
|
||||
.cra_u.ablkcipher = {
|
||||
.min_keysize = AES_KEYSIZE_128,
|
||||
.max_keysize = AES_KEYSIZE_128,
|
||||
.setkey = dcp_aes_setkey,
|
||||
.encrypt = dcp_aes_cbc_encrypt,
|
||||
.decrypt = dcp_aes_cbc_decrypt,
|
||||
.ivsize = AES_KEYSIZE_128,
|
||||
}
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
/* DCP bootstream verification interface: uses OTP key for crypto */
|
||||
static int dcp_bootstream_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
file->private_data = container_of((file->private_data),
|
||||
struct dcp_dev, dcp_bootstream_misc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long dcp_bootstream_ioctl(struct file *file,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct dcp_dev *dev = (struct dcp_dev *) file->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
int ret;
|
||||
|
||||
if (dev == NULL)
|
||||
return -EBADF;
|
||||
|
||||
if (cmd != DBS_ENC && cmd != DBS_DEC)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(dev->payload_base, argp, 16))
|
||||
return -EFAULT;
|
||||
|
||||
if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags))
|
||||
return -EAGAIN;
|
||||
|
||||
dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL);
|
||||
if (!dev->ctx) {
|
||||
dev_err(dev->dev,
|
||||
"cannot allocate context for OTP crypto");
|
||||
clear_bit(DCP_FLAG_BUSY, &dev->flags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT;
|
||||
dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC;
|
||||
dev->hw_pkg[0]->src = dev->payload_base_dma;
|
||||
dev->hw_pkg[0]->dst = dev->payload_base_dma;
|
||||
dev->hw_pkg[0]->size = 16;
|
||||
|
||||
dcp_op_start(dev, 0);
|
||||
|
||||
while (test_bit(DCP_FLAG_BUSY, &dev->flags))
|
||||
cpu_relax();
|
||||
|
||||
ret = dev->ctx->stat;
|
||||
if (!ret && copy_to_user(argp, dev->payload_base, 16))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(dev->ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations dcp_bootstream_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.unlocked_ioctl = dcp_bootstream_ioctl,
|
||||
.open = dcp_bootstream_open,
|
||||
};
|
||||
|
||||
static int dcp_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct dcp_dev *dev = NULL;
|
||||
struct resource *r;
|
||||
int i, ret, j;
|
||||
|
||||
dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
global_dev = dev;
|
||||
dev->dev = &pdev->dev;
|
||||
|
||||
platform_set_drvdata(pdev, dev);
|
||||
|
||||
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
if (!r) {
|
||||
dev_err(&pdev->dev, "failed to get IORESOURCE_MEM\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
dev->dcp_regs_base = devm_ioremap(&pdev->dev, r->start,
|
||||
resource_size(r));
|
||||
|
||||
dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL);
|
||||
udelay(10);
|
||||
dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL);
|
||||
|
||||
dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE |
|
||||
DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1,
|
||||
DCP_REG_CTRL);
|
||||
|
||||
dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i));
|
||||
|
||||
dcp_clear(dev, -1, DCP_REG_STAT);
|
||||
|
||||
|
||||
r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
|
||||
if (!r) {
|
||||
dev_err(&pdev->dev, "can't get IRQ resource (0)\n");
|
||||
return -EIO;
|
||||
}
|
||||
dev->dcp_vmi_irq = r->start;
|
||||
ret = request_irq(dev->dcp_vmi_irq, dcp_vmi_irq, 0, "dcp", dev);
|
||||
if (ret != 0) {
|
||||
dev_err(&pdev->dev, "can't request_irq (0)\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
r = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
|
||||
if (!r) {
|
||||
dev_err(&pdev->dev, "can't get IRQ resource (1)\n");
|
||||
ret = -EIO;
|
||||
goto err_free_irq0;
|
||||
}
|
||||
dev->dcp_irq = r->start;
|
||||
ret = request_irq(dev->dcp_irq, dcp_irq, 0, "dcp", dev);
|
||||
if (ret != 0) {
|
||||
dev_err(&pdev->dev, "can't request_irq (1)\n");
|
||||
ret = -EIO;
|
||||
goto err_free_irq0;
|
||||
}
|
||||
|
||||
dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev,
|
||||
DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
|
||||
&dev->hw_phys_pkg,
|
||||
GFP_KERNEL);
|
||||
if (!dev->hw_pkg[0]) {
|
||||
dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
|
||||
ret = -ENOMEM;
|
||||
goto err_free_irq1;
|
||||
}
|
||||
|
||||
for (i = 1; i < DCP_MAX_PKG; i++) {
|
||||
dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg
|
||||
+ i * sizeof(struct dcp_hw_packet);
|
||||
dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1;
|
||||
}
|
||||
dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg;
|
||||
|
||||
|
||||
dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
|
||||
&dev->payload_base_dma, GFP_KERNEL);
|
||||
if (!dev->payload_base) {
|
||||
dev_err(&pdev->dev, "Could not allocate memory for key\n");
|
||||
ret = -ENOMEM;
|
||||
goto err_free_hw_packet;
|
||||
}
|
||||
tasklet_init(&dev->queue_task, dcp_queue_task,
|
||||
(unsigned long) dev);
|
||||
tasklet_init(&dev->done_task, dcp_done_task,
|
||||
(unsigned long) dev);
|
||||
spin_lock_init(&dev->queue_lock);
|
||||
|
||||
crypto_init_queue(&dev->queue, 10);
|
||||
|
||||
init_timer(&dev->watchdog);
|
||||
dev->watchdog.function = &dcp_watchdog;
|
||||
dev->watchdog.data = (unsigned long)dev;
|
||||
|
||||
dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR,
|
||||
dev->dcp_bootstream_misc.name = "dcpboot",
|
||||
dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops,
|
||||
ret = misc_register(&dev->dcp_bootstream_misc);
|
||||
if (ret != 0) {
|
||||
dev_err(dev->dev, "Unable to register misc device\n");
|
||||
goto err_free_key_iv;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(algs); i++) {
|
||||
algs[i].cra_priority = 300;
|
||||
algs[i].cra_ctxsize = sizeof(struct dcp_op);
|
||||
algs[i].cra_module = THIS_MODULE;
|
||||
algs[i].cra_init = dcp_cra_init;
|
||||
algs[i].cra_exit = dcp_cra_exit;
|
||||
if (crypto_register_alg(&algs[i])) {
|
||||
dev_err(&pdev->dev, "register algorithm failed\n");
|
||||
ret = -ENOMEM;
|
||||
goto err_unregister;
|
||||
}
|
||||
}
|
||||
dev_notice(&pdev->dev, "DCP crypto enabled.!\n");
|
||||
|
||||
return 0;
|
||||
|
||||
err_unregister:
|
||||
for (j = 0; j < i; j++)
|
||||
crypto_unregister_alg(&algs[j]);
|
||||
err_free_key_iv:
|
||||
dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
|
||||
dev->payload_base_dma);
|
||||
err_free_hw_packet:
|
||||
dma_free_coherent(&pdev->dev, DCP_MAX_PKG *
|
||||
sizeof(struct dcp_hw_packet), dev->hw_pkg[0],
|
||||
dev->hw_phys_pkg);
|
||||
err_free_irq1:
|
||||
free_irq(dev->dcp_irq, dev);
|
||||
err_free_irq0:
|
||||
free_irq(dev->dcp_vmi_irq, dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcp_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct dcp_dev *dev;
|
||||
int j;
|
||||
dev = platform_get_drvdata(pdev);
|
||||
|
||||
dma_free_coherent(&pdev->dev,
|
||||
DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
|
||||
dev->hw_pkg[0], dev->hw_phys_pkg);
|
||||
|
||||
dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
|
||||
dev->payload_base_dma);
|
||||
|
||||
free_irq(dev->dcp_irq, dev);
|
||||
free_irq(dev->dcp_vmi_irq, dev);
|
||||
|
||||
tasklet_kill(&dev->done_task);
|
||||
tasklet_kill(&dev->queue_task);
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(algs); j++)
|
||||
crypto_unregister_alg(&algs[j]);
|
||||
|
||||
misc_deregister(&dev->dcp_bootstream_misc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct of_device_id fs_dcp_of_match[] = {
|
||||
{ .compatible = "fsl-dcp"},
|
||||
{},
|
||||
};
|
||||
|
||||
static struct platform_driver fs_dcp_driver = {
|
||||
.probe = dcp_probe,
|
||||
.remove = dcp_remove,
|
||||
.driver = {
|
||||
.name = "fsl-dcp",
|
||||
.owner = THIS_MODULE,
|
||||
.of_match_table = fs_dcp_of_match
|
||||
}
|
||||
};
|
||||
|
||||
module_platform_driver(fs_dcp_driver);
|
||||
|
||||
|
||||
MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>");
|
||||
MODULE_DESCRIPTION("Freescale DCP Crypto Driver");
|
||||
MODULE_LICENSE("GPL");
|
@ -2676,7 +2676,7 @@ err_out_stop_device:
|
||||
hifn_reset_dma(dev, 1);
|
||||
hifn_stop_device(dev);
|
||||
err_out_free_irq:
|
||||
free_irq(dev->irq, dev->name);
|
||||
free_irq(dev->irq, dev);
|
||||
tasklet_kill(&dev->tasklet);
|
||||
err_out_free_desc:
|
||||
pci_free_consistent(pdev, sizeof(struct hifn_dma),
|
||||
@ -2711,7 +2711,7 @@ static void hifn_remove(struct pci_dev *pdev)
|
||||
hifn_reset_dma(dev, 1);
|
||||
hifn_stop_device(dev);
|
||||
|
||||
free_irq(dev->irq, dev->name);
|
||||
free_irq(dev->irq, dev);
|
||||
tasklet_kill(&dev->tasklet);
|
||||
|
||||
hifn_flush(dev);
|
||||
|
@ -1146,7 +1146,6 @@ err_unmap_reg:
|
||||
err:
|
||||
kfree(cp);
|
||||
cpg = NULL;
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -203,13 +203,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
|
||||
|
||||
static int omap_aes_hw_init(struct omap_aes_dev *dd)
|
||||
{
|
||||
/*
|
||||
* clocks are enabled when request starts and disabled when finished.
|
||||
* It may be long delays between requests.
|
||||
* Device might go to off mode to save power.
|
||||
*/
|
||||
pm_runtime_get_sync(dd->dev);
|
||||
|
||||
if (!(dd->flags & FLAGS_INIT)) {
|
||||
dd->flags |= FLAGS_INIT;
|
||||
dd->err = 0;
|
||||
@ -636,7 +629,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
|
||||
|
||||
pr_debug("err: %d\n", err);
|
||||
|
||||
pm_runtime_put(dd->dev);
|
||||
dd->flags &= ~FLAGS_BUSY;
|
||||
|
||||
req->base.complete(&req->base, err);
|
||||
@ -837,8 +829,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
|
||||
|
||||
static int omap_aes_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
pr_debug("enter\n");
|
||||
struct omap_aes_dev *dd = NULL;
|
||||
|
||||
/* Find AES device, currently picks the first device */
|
||||
spin_lock_bh(&list_lock);
|
||||
list_for_each_entry(dd, &dev_list, list) {
|
||||
break;
|
||||
}
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
pm_runtime_get_sync(dd->dev);
|
||||
tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
|
||||
|
||||
return 0;
|
||||
@ -846,7 +846,16 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
|
||||
|
||||
static void omap_aes_cra_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
pr_debug("enter\n");
|
||||
struct omap_aes_dev *dd = NULL;
|
||||
|
||||
/* Find AES device, currently picks the first device */
|
||||
spin_lock_bh(&list_lock);
|
||||
list_for_each_entry(dd, &dev_list, list) {
|
||||
break;
|
||||
}
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
pm_runtime_put_sync(dd->dev);
|
||||
}
|
||||
|
||||
/* ********************** ALGS ************************************ */
|
||||
@ -1125,10 +1134,9 @@ static int omap_aes_probe(struct platform_device *pdev)
|
||||
if (err)
|
||||
goto err_res;
|
||||
|
||||
dd->io_base = devm_request_and_ioremap(dev, &res);
|
||||
if (!dd->io_base) {
|
||||
dev_err(dev, "can't ioremap\n");
|
||||
err = -ENOMEM;
|
||||
dd->io_base = devm_ioremap_resource(dev, &res);
|
||||
if (IS_ERR(dd->io_base)) {
|
||||
err = PTR_ERR(dd->io_base);
|
||||
goto err_res;
|
||||
}
|
||||
dd->phys_base = res.start;
|
||||
|
@ -1686,10 +1686,9 @@ static int omap_sham_probe(struct platform_device *pdev)
|
||||
if (err)
|
||||
goto res_err;
|
||||
|
||||
dd->io_base = devm_request_and_ioremap(dev, &res);
|
||||
if (!dd->io_base) {
|
||||
dev_err(dev, "can't ioremap\n");
|
||||
err = -ENOMEM;
|
||||
dd->io_base = devm_ioremap_resource(dev, &res);
|
||||
if (IS_ERR(dd->io_base)) {
|
||||
err = PTR_ERR(dd->io_base);
|
||||
goto res_err;
|
||||
}
|
||||
dd->phys_base = res.start;
|
||||
|
@ -1298,7 +1298,7 @@ static ssize_t spacc_stat_irq_thresh_store(struct device *dev,
|
||||
struct spacc_engine *engine = spacc_dev_to_engine(dev);
|
||||
unsigned long thresh;
|
||||
|
||||
if (strict_strtoul(buf, 0, &thresh))
|
||||
if (kstrtoul(buf, 0, &thresh))
|
||||
return -EINVAL;
|
||||
|
||||
thresh = clamp(thresh, 1UL, engine->fifo_sz - 1);
|
||||
|
@ -647,7 +647,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
|
||||
clk_disable(pdata->clk);
|
||||
|
||||
s5p_dev = NULL;
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -668,7 +667,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
|
||||
clk_disable(pdata->clk);
|
||||
|
||||
s5p_dev = NULL;
|
||||
platform_set_drvdata(pdev, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1629,7 +1629,7 @@ static int ux500_cryp_remove(struct platform_device *pdev)
|
||||
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
if (res)
|
||||
release_mem_region(res->start, res->end - res->start + 1);
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
|
||||
kfree(device_data);
|
||||
|
||||
|
@ -3,6 +3,10 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define CRC_T10DIF_DIGEST_SIZE 2
|
||||
#define CRC_T10DIF_BLOCK_SIZE 1
|
||||
|
||||
__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
|
||||
__u16 crc_t10dif(unsigned char const *, size_t);
|
||||
|
||||
#endif
|
||||
|
@ -66,6 +66,8 @@ config CRC16
|
||||
|
||||
config CRC_T10DIF
|
||||
tristate "CRC calculation for the T10 Data Integrity Field"
|
||||
select CRYPTO
|
||||
select CRYPTO_CRCT10DIF
|
||||
help
|
||||
This option is only needed if a module that's not in the
|
||||
kernel tree needs to calculate CRC checks for use with the
|
||||
|
@ -11,57 +11,44 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
/* Table generated using the following polynomium:
|
||||
* x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
|
||||
* gt: 0x8bb7
|
||||
*/
|
||||
static const __u16 t10_dif_crc_table[256] = {
|
||||
0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
|
||||
0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
|
||||
0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
|
||||
0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
|
||||
0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
|
||||
0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
|
||||
0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
|
||||
0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
|
||||
0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
|
||||
0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
|
||||
0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
|
||||
0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
|
||||
0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
|
||||
0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
|
||||
0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
|
||||
0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
|
||||
0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
|
||||
0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
|
||||
0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
|
||||
0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
|
||||
0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
|
||||
0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
|
||||
0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
|
||||
0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
|
||||
0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
|
||||
0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
|
||||
0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
|
||||
0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
|
||||
0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
|
||||
0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
|
||||
0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
|
||||
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
|
||||
};
|
||||
static struct crypto_shash *crct10dif_tfm;
|
||||
|
||||
__u16 crc_t10dif(const unsigned char *buffer, size_t len)
|
||||
{
|
||||
__u16 crc = 0;
|
||||
unsigned int i;
|
||||
struct {
|
||||
struct shash_desc shash;
|
||||
char ctx[2];
|
||||
} desc;
|
||||
int err;
|
||||
|
||||
for (i = 0 ; i < len ; i++)
|
||||
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
|
||||
desc.shash.tfm = crct10dif_tfm;
|
||||
desc.shash.flags = 0;
|
||||
*(__u16 *)desc.ctx = 0;
|
||||
|
||||
return crc;
|
||||
err = crypto_shash_update(&desc.shash, buffer, len);
|
||||
BUG_ON(err);
|
||||
|
||||
return *(__u16 *)desc.ctx;
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif);
|
||||
|
||||
static int __init crc_t10dif_mod_init(void)
|
||||
{
|
||||
crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
|
||||
return PTR_RET(crct10dif_tfm);
|
||||
}
|
||||
|
||||
static void __exit crc_t10dif_mod_fini(void)
|
||||
{
|
||||
crypto_free_shash(crct10dif_tfm);
|
||||
}
|
||||
|
||||
module_init(crc_t10dif_mod_init);
|
||||
module_exit(crc_t10dif_mod_fini);
|
||||
|
||||
MODULE_DESCRIPTION("T10 DIF CRC calculation");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
Loading…
Reference in New Issue
Block a user