Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:

 - Fixed algorithm construction hang when self-test fails.
 - Added SHA variants to talitos AEAD list.
 - New driver for Exynos random number generator.
 - Performance enhancements for arc4.
 - Added hwrng support to caam.
 - Added ahash support to caam.
 - Fixed bad kfree in aesni-intel.
 - Allow aesni-intel in FIPS mode.
 - Added atmel driver with support for AES/3DES/SHA.
 - Bug fixes for mv_cesa.
 - CRC hardware driver for BF60x family processors.

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (66 commits)
  crypto: twofish-avx - remove useless instruction
  crypto: testmgr - add aead cbc aes hmac sha1,256,512 test vectors
  crypto: talitos - add sha224, sha384 and sha512 to existing AEAD algorithms
  crypto: talitos - export the talitos_submit function
  crypto: talitos - move talitos structures to header file
  crypto: atmel - add new tests to tcrypt
  crypto: atmel - add Atmel SHA1/SHA256 driver
  crypto: atmel - add Atmel DES/TDES driver
  crypto: atmel - add Atmel AES driver
  ARM: AT91SAM9G45: add crypto peripherals
  crypto: testmgr - allow aesni-intel and ghash_clmulni-intel in fips mode
  hwrng: exynos - Add support for Exynos random number generator
  crypto: aesni-intel - fix wrong kfree pointer
  crypto: caam - ERA retrieval and printing for SEC device
  crypto: caam - Using alloc_coherent for caam job rings
  crypto: algapi - Fix hang on crypto allocation
  crypto: arc4 - now arc needs blockcipher support
  crypto: caam - one tasklet per job ring
  crypto: caam - consolidate memory barriers from job ring en/dequeue
  crypto: caam - only query h/w in job ring dequeue path
  ...
This commit is contained in:
Linus Torvalds 2012-07-26 13:00:59 -07:00
commit 44a6b84421
68 changed files with 14293 additions and 1846 deletions

View File

@ -183,6 +183,13 @@ static struct clk adc_op_clk = {
.rate_hz = 13200000,
};
/* AES/TDES/SHA clock - Only for sam9m11/sam9g56 */
static struct clk aestdessha_clk = {
.name = "aestdessha_clk",
.pmc_mask = 1 << AT91SAM9G45_ID_AESTDESSHA,
.type = CLK_TYPE_PERIPHERAL,
};
static struct clk *periph_clocks[] __initdata = {
&pioA_clk,
&pioB_clk,
@ -212,6 +219,7 @@ static struct clk *periph_clocks[] __initdata = {
&udphs_clk,
&mmc1_clk,
&adc_op_clk,
&aestdessha_clk,
// irq0
};
@ -232,6 +240,9 @@ static struct clk_lookup periph_clocks_lookups[] = {
CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
CLKDEV_CON_DEV_ID(NULL, "atmel-trng", &trng_clk),
CLKDEV_CON_DEV_ID(NULL, "atmel_sha", &aestdessha_clk),
CLKDEV_CON_DEV_ID(NULL, "atmel_tdes", &aestdessha_clk),
CLKDEV_CON_DEV_ID(NULL, "atmel_aes", &aestdessha_clk),
/* more usart lookup table for DT entries */
CLKDEV_CON_DEV_ID("usart", "ffffee00.serial", &mck),
CLKDEV_CON_DEV_ID("usart", "fff8c000.serial", &usart0_clk),
@ -388,7 +399,7 @@ static unsigned int at91sam9g45_default_irq_priority[NR_AIC_IRQS] __initdata = {
3, /* Ethernet */
0, /* Image Sensor Interface */
2, /* USB Device High speed port */
0,
0, /* AESTDESSHA Crypto HW Accelerators */
0, /* Multimedia Card Interface 1 */
0,
0, /* Advanced Interrupt Controller (IRQ0) */

View File

@ -18,6 +18,7 @@
#include <linux/platform_device.h>
#include <linux/i2c-gpio.h>
#include <linux/atmel-mci.h>
#include <linux/platform_data/atmel-aes.h>
#include <linux/platform_data/at91_adc.h>
@ -1830,6 +1831,130 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) {}
void __init at91_add_device_serial(void) {}
#endif
/* --------------------------------------------------------------------
* SHA1/SHA256
* -------------------------------------------------------------------- */
#if defined(CONFIG_CRYPTO_DEV_ATMEL_SHA) || defined(CONFIG_CRYPTO_DEV_ATMEL_SHA_MODULE)
static struct resource sha_resources[] = {
{
.start = AT91SAM9G45_BASE_SHA,
.end = AT91SAM9G45_BASE_SHA + SZ_16K - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = AT91SAM9G45_ID_AESTDESSHA,
.end = AT91SAM9G45_ID_AESTDESSHA,
.flags = IORESOURCE_IRQ,
},
};
static struct platform_device at91sam9g45_sha_device = {
.name = "atmel_sha",
.id = -1,
.resource = sha_resources,
.num_resources = ARRAY_SIZE(sha_resources),
};
static void __init at91_add_device_sha(void)
{
platform_device_register(&at91sam9g45_sha_device);
}
#else
static void __init at91_add_device_sha(void) {}
#endif
/* --------------------------------------------------------------------
* DES/TDES
* -------------------------------------------------------------------- */
#if defined(CONFIG_CRYPTO_DEV_ATMEL_TDES) || defined(CONFIG_CRYPTO_DEV_ATMEL_TDES_MODULE)
static struct resource tdes_resources[] = {
[0] = {
.start = AT91SAM9G45_BASE_TDES,
.end = AT91SAM9G45_BASE_TDES + SZ_16K - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = AT91SAM9G45_ID_AESTDESSHA,
.end = AT91SAM9G45_ID_AESTDESSHA,
.flags = IORESOURCE_IRQ,
},
};
static struct platform_device at91sam9g45_tdes_device = {
.name = "atmel_tdes",
.id = -1,
.resource = tdes_resources,
.num_resources = ARRAY_SIZE(tdes_resources),
};
static void __init at91_add_device_tdes(void)
{
platform_device_register(&at91sam9g45_tdes_device);
}
#else
static void __init at91_add_device_tdes(void) {}
#endif
/* --------------------------------------------------------------------
* AES
* -------------------------------------------------------------------- */
#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE)
static struct aes_platform_data aes_data;
static u64 aes_dmamask = DMA_BIT_MASK(32);
static struct resource aes_resources[] = {
[0] = {
.start = AT91SAM9G45_BASE_AES,
.end = AT91SAM9G45_BASE_AES + SZ_16K - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = AT91SAM9G45_ID_AESTDESSHA,
.end = AT91SAM9G45_ID_AESTDESSHA,
.flags = IORESOURCE_IRQ,
},
};
static struct platform_device at91sam9g45_aes_device = {
.name = "atmel_aes",
.id = -1,
.dev = {
.dma_mask = &aes_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &aes_data,
},
.resource = aes_resources,
.num_resources = ARRAY_SIZE(aes_resources),
};
static void __init at91_add_device_aes(void)
{
struct at_dma_slave *atslave;
struct aes_dma_data *alt_atslave;
alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL);
/* DMA TX slave channel configuration */
atslave = &alt_atslave->txdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW |
ATC_SRC_PER(AT_DMA_ID_AES_RX);
/* DMA RX slave channel configuration */
atslave = &alt_atslave->rxdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW |
ATC_DST_PER(AT_DMA_ID_AES_TX);
aes_data.dma_slave = alt_atslave;
platform_device_register(&at91sam9g45_aes_device);
}
#else
static void __init at91_add_device_aes(void) {}
#endif
/* -------------------------------------------------------------------- */
/*
@ -1847,6 +1972,9 @@ static int __init at91_add_standard_devices(void)
at91_add_device_trng();
at91_add_device_watchdog();
at91_add_device_tc();
at91_add_device_sha();
at91_add_device_tdes();
at91_add_device_aes();
return 0;
}

View File

@ -136,6 +136,8 @@
#define AT_DMA_ID_SSC1_RX 8
#define AT_DMA_ID_AC97_TX 9
#define AT_DMA_ID_AC97_RX 10
#define AT_DMA_ID_AES_TX 11
#define AT_DMA_ID_AES_RX 12
#define AT_DMA_ID_MCI1 13
#endif

View File

@ -149,7 +149,6 @@ core-$(CONFIG_KVM) += arch/powerpc/kvm/
core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
drivers-$(CONFIG_CRYPTO_DEV_NX) += drivers/crypto/nx/
# Default to zImage, override when needed
all: zImage

View File

@ -1,18 +0,0 @@
/*
* Cryptographic API.
*
* Function for checking keys for the DES and Tripple DES Encryption
* algorithms.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#ifndef __CRYPTO_DES_H__
#define __CRYPTO_DES_H__
extern int crypto_des_check_key(const u8*, unsigned int, u32*);
#endif /*__CRYPTO_DES_H__*/

View File

@ -2,6 +2,9 @@
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
# enable AVX support only when $(AS) can actually assemble the instructions
ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
endif
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o

View File

@ -0,0 +1,149 @@
/*
* Shared async block cipher helpers
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* Based on aesni-intel_glue.c by:
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <crypto/algapi.h>
#include <crypto/cryptd.h>
#include <asm/i387.h>
#include <asm/crypto/ablk_helper.h>
int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
int err;
crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ablkcipher_setkey(child, key, key_len);
crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
EXPORT_SYMBOL_GPL(ablk_set_key);
int __ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->encrypt(
&desc, req->dst, req->src, req->nbytes);
}
EXPORT_SYMBOL_GPL(__ablk_encrypt);
int ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_encrypt(cryptd_req);
} else {
return __ablk_encrypt(req);
}
}
EXPORT_SYMBOL_GPL(ablk_encrypt);
int ablk_decrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_decrypt(cryptd_req);
} else {
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->decrypt(
&desc, req->dst, req->src, req->nbytes);
}
}
EXPORT_SYMBOL_GPL(ablk_decrypt);
void ablk_exit(struct crypto_tfm *tfm)
{
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
EXPORT_SYMBOL_GPL(ablk_exit);
int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
{
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
return 0;
}
EXPORT_SYMBOL_GPL(ablk_init_common);
int ablk_init(struct crypto_tfm *tfm)
{
char drv_name[CRYPTO_MAX_ALG_NAME];
snprintf(drv_name, sizeof(drv_name), "__driver-%s",
crypto_tfm_alg_driver_name(tfm));
return ablk_init_common(tfm, drv_name);
}
EXPORT_SYMBOL_GPL(ablk_init);
MODULE_LICENSE("GPL");

View File

@ -5,7 +5,7 @@
#include <linux/module.h>
#include <crypto/aes.h>
#include <asm/aes.h>
#include <asm/crypto/aes.h>
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);

View File

@ -30,7 +30,8 @@
#include <crypto/ctr.h>
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/aes.h>
#include <asm/crypto/aes.h>
#include <asm/crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <linux/workqueue.h>
@ -52,10 +53,6 @@
#define HAS_XTS
#endif
struct async_aes_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc,
}
#endif
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
int err;
crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ablkcipher_setkey(child, key, key_len);
crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_encrypt(cryptd_req);
} else {
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->encrypt(
&desc, req->dst, req->src, req->nbytes);
}
}
static int ablk_decrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_decrypt(cryptd_req);
} else {
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->decrypt(
&desc, req->dst, req->src, req->nbytes);
}
}
static void ablk_exit(struct crypto_tfm *tfm)
{
struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
{
struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
return 0;
}
static int ablk_ecb_init(struct crypto_tfm *tfm)
{
return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
struct aesni_rfc4106_gcm_ctx *child_ctx =
aesni_rfc4106_gcm_ctx_get(cryptd_child);
u8 *new_key_mem = NULL;
u8 *new_key_align, *new_key_mem = NULL;
if (key_len < 4) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
if (!new_key_mem)
return -ENOMEM;
new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
memcpy(new_key_mem, key, key_len);
key = new_key_mem;
new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
memcpy(new_key_align, key, key_len);
key = new_key_align;
}
if (!irq_fpu_usable())
@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,

View File

@ -5,10 +5,6 @@
*
* Camellia parts based on code by:
* Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -34,9 +30,9 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
&tfm->crt_flags);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = CAMELLIA_BLOCK_SIZE;
unsigned int nbytes;
int err;
u128 iv = *src;
err = blkcipher_walk_virt(desc, walk);
camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
u128_xor(&dst[1], &dst[1], &iv);
}
/* Process two block batch */
if (nbytes >= bsize * 2) {
do {
fn_2way(ctx, wdst, wsrc);
static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
be128 ctrblk;
wsrc += bsize * 2;
wdst += bsize * 2;
nbytes -= bsize * 2;
} while (nbytes >= bsize * 2);
if (dst != src)
*dst = *src;
if (nbytes < bsize)
goto done;
}
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
}
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[2];
done:
err = blkcipher_walk_done(desc, walk, nbytes);
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
}
return err;
u128_to_be128(&ctrblks[0], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[1], iv);
u128_inc(iv);
camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = CAMELLIA_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = CAMELLIA_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ivs[2 - 1];
u128 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process two block batch */
if (nbytes >= bsize * 2) {
do {
nbytes -= bsize * (2 - 1);
src -= 2 - 1;
dst -= 2 - 1;
ivs[0] = src[0];
camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
u128_xor(dst + 1, dst + 1, ivs + 0);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 2);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 keystream[CAMELLIA_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
u128 ctrblk;
memcpy(keystream, src, nbytes);
camellia_enc_blk_xor(ctx, keystream, walk->iv);
memcpy(dst, keystream, nbytes);
be128_to_u128(&ctrblk, (be128 *)walk->iv);
u128_inc(&ctrblk);
u128_to_be128((be128 *)walk->iv, &ctrblk);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = CAMELLIA_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
be128 ctrblocks[2];
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process two block batch */
if (nbytes >= bsize * 2) {
do {
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
}
/* create ctrblks for parallel encrypt */
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
u128_to_be128(&ctrblocks[1], &ctrblk);
u128_inc(&ctrblk);
camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 2;
dst += 2;
nbytes -= bsize * 2;
} while (nbytes >= bsize * 2);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)

View File

@ -0,0 +1,307 @@
/*
* Shared glue code for 128bit block ciphers
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes, i, func_bytes;
bool fpu_enabled = false;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
/* Process multi-block batch */
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.ecb(ctx, wdst,
wsrc);
wsrc += func_bytes;
wdst += func_bytes;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return __glue_ecb_crypt_128bit(gctx, desc, &walk);
}
EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
fn(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
}
int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
static unsigned int
__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 last_iv;
unsigned int num_blocks, func_bytes;
unsigned int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
/* Process multi-block batch */
if (nbytes >= func_bytes) {
do {
nbytes -= func_bytes - bsize;
src -= num_blocks - 1;
dst -= num_blocks - 1;
gctx->funcs[i].fn_u.cbc(ctx, dst, src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
}
int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *src = (u8 *)walk->src.virt.addr;
u8 *dst = (u8 *)walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
u128 ctrblk;
u128 tmp;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
memcpy(&tmp, src, nbytes);
fn_ctr(ctx, &tmp, &tmp, &ctrblk);
memcpy(dst, &tmp, nbytes);
u128_to_be128((be128 *)walk->iv, &ctrblk);
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
const unsigned int bsize = 128 / 8;
void *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
unsigned int num_blocks, func_bytes;
unsigned int i;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, bsize);
while ((nbytes = walk.nbytes) >= bsize) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
glue_fpu_end(fpu_enabled);
if (walk.nbytes) {
glue_ctr_crypt_final_128bit(
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,704 @@
/*
* Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "serpent-avx-x86_64-asm_64.S"
.text
#define CTX %rdi
/**********************************************************************
8-way AVX serpent
**********************************************************************/
#define RA1 %xmm0
#define RB1 %xmm1
#define RC1 %xmm2
#define RD1 %xmm3
#define RE1 %xmm4
#define tp %xmm5
#define RA2 %xmm6
#define RB2 %xmm7
#define RC2 %xmm8
#define RD2 %xmm9
#define RE2 %xmm10
#define RNOT %xmm11
#define RK0 %xmm12
#define RK1 %xmm13
#define RK2 %xmm14
#define RK3 %xmm15
#define S0_1(x0, x1, x2, x3, x4) \
vpor x0, x3, tp; \
vpxor x3, x0, x0; \
vpxor x2, x3, x4; \
vpxor RNOT, x4, x4; \
vpxor x1, tp, x3; \
vpand x0, x1, x1; \
vpxor x4, x1, x1; \
vpxor x0, x2, x2;
#define S0_2(x0, x1, x2, x3, x4) \
vpxor x3, x0, x0; \
vpor x0, x4, x4; \
vpxor x2, x0, x0; \
vpand x1, x2, x2; \
vpxor x2, x3, x3; \
vpxor RNOT, x1, x1; \
vpxor x4, x2, x2; \
vpxor x2, x1, x1;
#define S1_1(x0, x1, x2, x3, x4) \
vpxor x0, x1, tp; \
vpxor x3, x0, x0; \
vpxor RNOT, x3, x3; \
vpand tp, x1, x4; \
vpor tp, x0, x0; \
vpxor x2, x3, x3; \
vpxor x3, x0, x0; \
vpxor x3, tp, x1;
#define S1_2(x0, x1, x2, x3, x4) \
vpxor x4, x3, x3; \
vpor x4, x1, x1; \
vpxor x2, x4, x4; \
vpand x0, x2, x2; \
vpxor x1, x2, x2; \
vpor x0, x1, x1; \
vpxor RNOT, x0, x0; \
vpxor x2, x0, x0; \
vpxor x1, x4, x4;
#define S2_1(x0, x1, x2, x3, x4) \
vpxor RNOT, x3, x3; \
vpxor x0, x1, x1; \
vpand x2, x0, tp; \
vpxor x3, tp, tp; \
vpor x0, x3, x3; \
vpxor x1, x2, x2; \
vpxor x1, x3, x3; \
vpand tp, x1, x1;
#define S2_2(x0, x1, x2, x3, x4) \
vpxor x2, tp, tp; \
vpand x3, x2, x2; \
vpor x1, x3, x3; \
vpxor RNOT, tp, tp; \
vpxor tp, x3, x3; \
vpxor tp, x0, x4; \
vpxor x2, tp, x0; \
vpor x2, x1, x1;
#define S3_1(x0, x1, x2, x3, x4) \
vpxor x3, x1, tp; \
vpor x0, x3, x3; \
vpand x0, x1, x4; \
vpxor x2, x0, x0; \
vpxor tp, x2, x2; \
vpand x3, tp, x1; \
vpxor x3, x2, x2; \
vpor x4, x0, x0; \
vpxor x3, x4, x4;
#define S3_2(x0, x1, x2, x3, x4) \
vpxor x0, x1, x1; \
vpand x3, x0, x0; \
vpand x4, x3, x3; \
vpxor x2, x3, x3; \
vpor x1, x4, x4; \
vpand x1, x2, x2; \
vpxor x3, x4, x4; \
vpxor x3, x0, x0; \
vpxor x2, x3, x3;
#define S4_1(x0, x1, x2, x3, x4) \
vpand x0, x3, tp; \
vpxor x3, x0, x0; \
vpxor x2, tp, tp; \
vpor x3, x2, x2; \
vpxor x1, x0, x0; \
vpxor tp, x3, x4; \
vpor x0, x2, x2; \
vpxor x1, x2, x2;
#define S4_2(x0, x1, x2, x3, x4) \
vpand x0, x1, x1; \
vpxor x4, x1, x1; \
vpand x2, x4, x4; \
vpxor tp, x2, x2; \
vpxor x0, x4, x4; \
vpor x1, tp, x3; \
vpxor RNOT, x1, x1; \
vpxor x0, x3, x3;
#define S5_1(x0, x1, x2, x3, x4) \
vpor x0, x1, tp; \
vpxor tp, x2, x2; \
vpxor RNOT, x3, x3; \
vpxor x0, x1, x4; \
vpxor x2, x0, x0; \
vpand x4, tp, x1; \
vpor x3, x4, x4; \
vpxor x0, x4, x4;
#define S5_2(x0, x1, x2, x3, x4) \
vpand x3, x0, x0; \
vpxor x3, x1, x1; \
vpxor x2, x3, x3; \
vpxor x1, x0, x0; \
vpand x4, x2, x2; \
vpxor x2, x1, x1; \
vpand x0, x2, x2; \
vpxor x2, x3, x3;
#define S6_1(x0, x1, x2, x3, x4) \
vpxor x0, x3, x3; \
vpxor x2, x1, tp; \
vpxor x0, x2, x2; \
vpand x3, x0, x0; \
vpor x3, tp, tp; \
vpxor RNOT, x1, x4; \
vpxor tp, x0, x0; \
vpxor x2, tp, x1;
#define S6_2(x0, x1, x2, x3, x4) \
vpxor x4, x3, x3; \
vpxor x0, x4, x4; \
vpand x0, x2, x2; \
vpxor x1, x4, x4; \
vpxor x3, x2, x2; \
vpand x1, x3, x3; \
vpxor x0, x3, x3; \
vpxor x2, x1, x1;
#define S7_1(x0, x1, x2, x3, x4) \
vpxor RNOT, x1, tp; \
vpxor RNOT, x0, x0; \
vpand x2, tp, x1; \
vpxor x3, x1, x1; \
vpor tp, x3, x3; \
vpxor x2, tp, x4; \
vpxor x3, x2, x2; \
vpxor x0, x3, x3; \
vpor x1, x0, x0;
#define S7_2(x0, x1, x2, x3, x4) \
vpand x0, x2, x2; \
vpxor x4, x0, x0; \
vpxor x3, x4, x4; \
vpand x0, x3, x3; \
vpxor x1, x4, x4; \
vpxor x4, x2, x2; \
vpxor x1, x3, x3; \
vpor x0, x4, x4; \
vpxor x1, x4, x4;
#define SI0_1(x0, x1, x2, x3, x4) \
vpxor x0, x1, x1; \
vpor x1, x3, tp; \
vpxor x1, x3, x4; \
vpxor RNOT, x0, x0; \
vpxor tp, x2, x2; \
vpxor x0, tp, x3; \
vpand x1, x0, x0; \
vpxor x2, x0, x0;
#define SI0_2(x0, x1, x2, x3, x4) \
vpand x3, x2, x2; \
vpxor x4, x3, x3; \
vpxor x3, x2, x2; \
vpxor x3, x1, x1; \
vpand x0, x3, x3; \
vpxor x0, x1, x1; \
vpxor x2, x0, x0; \
vpxor x3, x4, x4;
#define SI1_1(x0, x1, x2, x3, x4) \
vpxor x3, x1, x1; \
vpxor x2, x0, tp; \
vpxor RNOT, x2, x2; \
vpor x1, x0, x4; \
vpxor x3, x4, x4; \
vpand x1, x3, x3; \
vpxor x2, x1, x1; \
vpand x4, x2, x2;
#define SI1_2(x0, x1, x2, x3, x4) \
vpxor x1, x4, x4; \
vpor x3, x1, x1; \
vpxor tp, x3, x3; \
vpxor tp, x2, x2; \
vpor x4, tp, x0; \
vpxor x4, x2, x2; \
vpxor x0, x1, x1; \
vpxor x1, x4, x4;
#define SI2_1(x0, x1, x2, x3, x4) \
vpxor x1, x2, x2; \
vpxor RNOT, x3, tp; \
vpor x2, tp, tp; \
vpxor x3, x2, x2; \
vpxor x0, x3, x4; \
vpxor x1, tp, x3; \
vpor x2, x1, x1; \
vpxor x0, x2, x2;
#define SI2_2(x0, x1, x2, x3, x4) \
vpxor x4, x1, x1; \
vpor x3, x4, x4; \
vpxor x3, x2, x2; \
vpxor x2, x4, x4; \
vpand x1, x2, x2; \
vpxor x3, x2, x2; \
vpxor x4, x3, x3; \
vpxor x0, x4, x4;
#define SI3_1(x0, x1, x2, x3, x4) \
vpxor x1, x2, x2; \
vpand x2, x1, tp; \
vpxor x0, tp, tp; \
vpor x1, x0, x0; \
vpxor x3, x1, x4; \
vpxor x3, x0, x0; \
vpor tp, x3, x3; \
vpxor x2, tp, x1;
#define SI3_2(x0, x1, x2, x3, x4) \
vpxor x3, x1, x1; \
vpxor x2, x0, x0; \
vpxor x3, x2, x2; \
vpand x1, x3, x3; \
vpxor x0, x1, x1; \
vpand x2, x0, x0; \
vpxor x3, x4, x4; \
vpxor x0, x3, x3; \
vpxor x1, x0, x0;
#define SI4_1(x0, x1, x2, x3, x4) \
vpxor x3, x2, x2; \
vpand x1, x0, tp; \
vpxor x2, tp, tp; \
vpor x3, x2, x2; \
vpxor RNOT, x0, x4; \
vpxor tp, x1, x1; \
vpxor x2, tp, x0; \
vpand x4, x2, x2;
#define SI4_2(x0, x1, x2, x3, x4) \
vpxor x0, x2, x2; \
vpor x4, x0, x0; \
vpxor x3, x0, x0; \
vpand x2, x3, x3; \
vpxor x3, x4, x4; \
vpxor x1, x3, x3; \
vpand x0, x1, x1; \
vpxor x1, x4, x4; \
vpxor x3, x0, x0;
#define SI5_1(x0, x1, x2, x3, x4) \
vpor x2, x1, tp; \
vpxor x1, x2, x2; \
vpxor x3, tp, tp; \
vpand x1, x3, x3; \
vpxor x3, x2, x2; \
vpor x0, x3, x3; \
vpxor RNOT, x0, x0; \
vpxor x2, x3, x3; \
vpor x0, x2, x2;
#define SI5_2(x0, x1, x2, x3, x4) \
vpxor tp, x1, x4; \
vpxor x4, x2, x2; \
vpand x0, x4, x4; \
vpxor tp, x0, x0; \
vpxor x3, tp, x1; \
vpand x2, x0, x0; \
vpxor x3, x2, x2; \
vpxor x2, x0, x0; \
vpxor x4, x2, x2; \
vpxor x3, x4, x4;
#define SI6_1(x0, x1, x2, x3, x4) \
vpxor x2, x0, x0; \
vpand x3, x0, tp; \
vpxor x3, x2, x2; \
vpxor x2, tp, tp; \
vpxor x1, x3, x3; \
vpor x0, x2, x2; \
vpxor x3, x2, x2; \
vpand tp, x3, x3;
#define SI6_2(x0, x1, x2, x3, x4) \
vpxor RNOT, tp, tp; \
vpxor x1, x3, x3; \
vpand x2, x1, x1; \
vpxor tp, x0, x4; \
vpxor x4, x3, x3; \
vpxor x2, x4, x4; \
vpxor x1, tp, x0; \
vpxor x0, x2, x2;
#define SI7_1(x0, x1, x2, x3, x4) \
vpand x0, x3, tp; \
vpxor x2, x0, x0; \
vpor x3, x2, x2; \
vpxor x1, x3, x4; \
vpxor RNOT, x0, x0; \
vpor tp, x1, x1; \
vpxor x0, x4, x4; \
vpand x2, x0, x0; \
vpxor x1, x0, x0;
#define SI7_2(x0, x1, x2, x3, x4) \
vpand x2, x1, x1; \
vpxor x2, tp, x3; \
vpxor x3, x4, x4; \
vpand x3, x2, x2; \
vpor x0, x3, x3; \
vpxor x4, x1, x1; \
vpxor x4, x3, x3; \
vpand x0, x4, x4; \
vpxor x2, x4, x4;
#define get_key(i, j, t) \
vbroadcastss (4*(i)+(j))*4(CTX), t;
#define K2(x0, x1, x2, x3, x4, i) \
get_key(i, 0, RK0); \
get_key(i, 1, RK1); \
get_key(i, 2, RK2); \
get_key(i, 3, RK3); \
vpxor RK0, x0 ## 1, x0 ## 1; \
vpxor RK1, x1 ## 1, x1 ## 1; \
vpxor RK2, x2 ## 1, x2 ## 1; \
vpxor RK3, x3 ## 1, x3 ## 1; \
vpxor RK0, x0 ## 2, x0 ## 2; \
vpxor RK1, x1 ## 2, x1 ## 2; \
vpxor RK2, x2 ## 2, x2 ## 2; \
vpxor RK3, x3 ## 2, x3 ## 2;
#define LK2(x0, x1, x2, x3, x4, i) \
vpslld $13, x0 ## 1, x4 ## 1; \
vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \
vpor x4 ## 1, x0 ## 1, x0 ## 1; \
vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
vpslld $3, x2 ## 1, x4 ## 1; \
vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \
vpor x4 ## 1, x2 ## 1, x2 ## 1; \
vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
vpslld $13, x0 ## 2, x4 ## 2; \
vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \
vpor x4 ## 2, x0 ## 2, x0 ## 2; \
vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
vpslld $3, x2 ## 2, x4 ## 2; \
vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \
vpor x4 ## 2, x2 ## 2, x2 ## 2; \
vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
vpslld $1, x1 ## 1, x4 ## 1; \
vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \
vpor x4 ## 1, x1 ## 1, x1 ## 1; \
vpslld $3, x0 ## 1, x4 ## 1; \
vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
get_key(i, 1, RK1); \
vpslld $1, x1 ## 2, x4 ## 2; \
vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \
vpor x4 ## 2, x1 ## 2, x1 ## 2; \
vpslld $3, x0 ## 2, x4 ## 2; \
vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
get_key(i, 3, RK3); \
vpslld $7, x3 ## 1, x4 ## 1; \
vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \
vpor x4 ## 1, x3 ## 1, x3 ## 1; \
vpslld $7, x1 ## 1, x4 ## 1; \
vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
get_key(i, 0, RK0); \
vpslld $7, x3 ## 2, x4 ## 2; \
vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \
vpor x4 ## 2, x3 ## 2, x3 ## 2; \
vpslld $7, x1 ## 2, x4 ## 2; \
vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
get_key(i, 2, RK2); \
vpxor RK1, x1 ## 1, x1 ## 1; \
vpxor RK3, x3 ## 1, x3 ## 1; \
vpslld $5, x0 ## 1, x4 ## 1; \
vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \
vpor x4 ## 1, x0 ## 1, x0 ## 1; \
vpslld $22, x2 ## 1, x4 ## 1; \
vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \
vpor x4 ## 1, x2 ## 1, x2 ## 1; \
vpxor RK0, x0 ## 1, x0 ## 1; \
vpxor RK2, x2 ## 1, x2 ## 1; \
vpxor RK1, x1 ## 2, x1 ## 2; \
vpxor RK3, x3 ## 2, x3 ## 2; \
vpslld $5, x0 ## 2, x4 ## 2; \
vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \
vpor x4 ## 2, x0 ## 2, x0 ## 2; \
vpslld $22, x2 ## 2, x4 ## 2; \
vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \
vpor x4 ## 2, x2 ## 2, x2 ## 2; \
vpxor RK0, x0 ## 2, x0 ## 2; \
vpxor RK2, x2 ## 2, x2 ## 2;
#define KL2(x0, x1, x2, x3, x4, i) \
vpxor RK0, x0 ## 1, x0 ## 1; \
vpxor RK2, x2 ## 1, x2 ## 1; \
vpsrld $5, x0 ## 1, x4 ## 1; \
vpslld $(32 - 5), x0 ## 1, x0 ## 1; \
vpor x4 ## 1, x0 ## 1, x0 ## 1; \
vpxor RK3, x3 ## 1, x3 ## 1; \
vpxor RK1, x1 ## 1, x1 ## 1; \
vpsrld $22, x2 ## 1, x4 ## 1; \
vpslld $(32 - 22), x2 ## 1, x2 ## 1; \
vpor x4 ## 1, x2 ## 1, x2 ## 1; \
vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
vpxor RK0, x0 ## 2, x0 ## 2; \
vpxor RK2, x2 ## 2, x2 ## 2; \
vpsrld $5, x0 ## 2, x4 ## 2; \
vpslld $(32 - 5), x0 ## 2, x0 ## 2; \
vpor x4 ## 2, x0 ## 2, x0 ## 2; \
vpxor RK3, x3 ## 2, x3 ## 2; \
vpxor RK1, x1 ## 2, x1 ## 2; \
vpsrld $22, x2 ## 2, x4 ## 2; \
vpslld $(32 - 22), x2 ## 2, x2 ## 2; \
vpor x4 ## 2, x2 ## 2, x2 ## 2; \
vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
vpslld $7, x1 ## 1, x4 ## 1; \
vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
vpsrld $1, x1 ## 1, x4 ## 1; \
vpslld $(32 - 1), x1 ## 1, x1 ## 1; \
vpor x4 ## 1, x1 ## 1, x1 ## 1; \
vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
vpslld $7, x1 ## 2, x4 ## 2; \
vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
vpsrld $1, x1 ## 2, x4 ## 2; \
vpslld $(32 - 1), x1 ## 2, x1 ## 2; \
vpor x4 ## 2, x1 ## 2, x1 ## 2; \
vpsrld $7, x3 ## 1, x4 ## 1; \
vpslld $(32 - 7), x3 ## 1, x3 ## 1; \
vpor x4 ## 1, x3 ## 1, x3 ## 1; \
vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
vpslld $3, x0 ## 1, x4 ## 1; \
vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
vpsrld $7, x3 ## 2, x4 ## 2; \
vpslld $(32 - 7), x3 ## 2, x3 ## 2; \
vpor x4 ## 2, x3 ## 2, x3 ## 2; \
vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
vpslld $3, x0 ## 2, x4 ## 2; \
vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
vpsrld $13, x0 ## 1, x4 ## 1; \
vpslld $(32 - 13), x0 ## 1, x0 ## 1; \
vpor x4 ## 1, x0 ## 1, x0 ## 1; \
vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
vpsrld $3, x2 ## 1, x4 ## 1; \
vpslld $(32 - 3), x2 ## 1, x2 ## 1; \
vpor x4 ## 1, x2 ## 1, x2 ## 1; \
vpsrld $13, x0 ## 2, x4 ## 2; \
vpslld $(32 - 13), x0 ## 2, x0 ## 2; \
vpor x4 ## 2, x0 ## 2, x0 ## 2; \
vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
vpsrld $3, x2 ## 2, x4 ## 2; \
vpslld $(32 - 3), x2 ## 2, x2 ## 2; \
vpor x4 ## 2, x2 ## 2, x2 ## 2;
#define S(SBOX, x0, x1, x2, x3, x4) \
SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
#define SP(SBOX, x0, x1, x2, x3, x4, i) \
get_key(i, 0, RK0); \
SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
get_key(i, 2, RK2); \
SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
get_key(i, 3, RK3); \
SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
get_key(i, 1, RK1); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
vpunpckldq x1, x0, t0; \
vpunpckhdq x1, x0, t2; \
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x3; \
\
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1; \
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
vmovdqu (0*4*4)(in), x0; \
vmovdqu (1*4*4)(in), x1; \
vmovdqu (2*4*4)(in), x2; \
vmovdqu (3*4*4)(in), x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vmovdqu x0, (0*4*4)(out); \
vmovdqu x1, (1*4*4)(out); \
vmovdqu x2, (2*4*4)(out); \
vmovdqu x3, (3*4*4)(out);
#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor (2*4*4)(out), x2, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor (3*4*4)(out), x3, x3; \
vmovdqu x3, (3*4*4)(out);
.align 8
.global __serpent_enc_blk_8way_avx
.type __serpent_enc_blk_8way_avx,@function;
__serpent_enc_blk_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
vpcmpeqd RNOT, RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 0);
S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
leaq (4*4*4)(%rsi), %rax;
testb %cl, %cl;
jnz __enc_xor8;
write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
__enc_xor8:
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
.align 8
.global serpent_dec_blk_8way_avx
.type serpent_dec_blk_8way_avx,@function;
serpent_dec_blk_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vpcmpeqd RNOT, RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 32);
SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
leaq (4*4*4)(%rsi), %rax;
write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;

View File

@ -0,0 +1,636 @@
/*
* Glue Code for AVX assembler versions of Serpent Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Glue code based on serpent_sse2_glue.c by:
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/serpent.h>
#include <crypto/cryptd.h>
#include <crypto/b128ops.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/serpent-avx.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
unsigned int j;
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
be128 ctrblk;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i;
for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
} }
};
static const struct common_glue_ctx serpent_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
} }
};
static const struct common_glue_ctx serpent_dec = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
} }
};
static const struct common_glue_ctx serpent_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
}
static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
NULL, fpu_enabled, nbytes);
}
static inline void serpent_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
struct serpent_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_encrypt(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_decrypt(ctx->ctx, srcdst, srcdst);
}
struct serpent_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct serpent_ctx serpent_ctx;
};
static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
SERPENT_BLOCK_SIZE);
if (err)
return err;
return lrw_init_table(&ctx->lrw_table, key + keylen -
SERPENT_BLOCK_SIZE);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->serpent_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->serpent_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
struct serpent_ctx crypt_ctx;
};
static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
int err;
/* key consists of keys of equal size concatenated, therefore
* the length must be even
*/
if (keylen % 2) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* first half of xts-key is for crypt */
err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
if (err)
return err;
/* second half of xts-key is for tweak */
return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-avx",
.cra_driver_name = "__driver-ecb-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = serpent_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-serpent-avx",
.cra_driver_name = "__driver-cbc-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = serpent_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-serpent-avx",
.cra_driver_name = "__driver-ctr-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = serpent_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-serpent-avx",
.cra_driver_name = "__driver-lrw-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
.cra_exit = lrw_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = lrw_serpent_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-serpent-avx",
.cra_driver_name = "__driver-xts-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE * 2,
.max_keysize = SERPENT_MAX_KEY_SIZE * 2,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = xts_serpent_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(serpent)",
.cra_driver_name = "ecb-serpent-avx",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(serpent)",
.cra_driver_name = "cbc-serpent-avx",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(serpent)",
.cra_driver_name = "ctr-serpent-avx",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(serpent)",
.cra_driver_name = "lrw-serpent-avx",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(serpent)",
.cra_driver_name = "xts-serpent-avx",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE * 2,
.max_keysize = SERPENT_MAX_KEY_SIZE * 2,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init serpent_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave) {
printk(KERN_INFO "AVX instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
printk(KERN_INFO "AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_exit(void)
{
crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_init);
module_exit(serpent_exit);
MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS("serpent");

View File

@ -41,358 +41,145 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/i387.h>
#include <asm/serpent.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <asm/crypto/serpent-sse2.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
struct async_serpent_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
if (fpu_enabled)
return true;
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
unsigned int j;
/* SSE2 is only used when chunk to be processed is large enough, so
* do not enable FPU until it is necessary.
*/
if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
return false;
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
kernel_fpu_begin();
return true;
serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
static inline void serpent_fpu_end(bool fpu_enabled)
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
if (fpu_enabled)
kernel_fpu_end();
be128 ctrblk;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
bool fpu_enabled = false;
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes;
int err;
be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
if (enc)
serpent_enc_blk_xway(ctx, wdst, wsrc);
else
serpent_dec_blk_xway(ctx, wdst, wsrc);
wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
wdst += bsize * SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (enc)
__serpent_encrypt(ctx, wdst, wsrc);
else
__serpent_decrypt(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
serpent_fpu_end(fpu_enabled);
return err;
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
} }
};
static const struct common_glue_ctx serpent_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
} }
};
static const struct common_glue_ctx serpent_dec = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
} }
};
static const struct common_glue_ctx serpent_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, true);
return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, false);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
__serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
u128 last_iv;
int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
src -= SERPENT_PARALLEL_BLOCKS - 1;
dst -= SERPENT_PARALLEL_BLOCKS - 1;
for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
ivs[i] = src[i];
serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
__serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
serpent_fpu_end(fpu_enabled);
return err;
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[SERPENT_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__serpent_encrypt(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
int i;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
/* create ctrblks for parallel encrypt */
for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblocks[i], &ctrblk);
u128_inc(&ctrblk);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += SERPENT_PARALLEL_BLOCKS;
dst += SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
__serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
u128_xor(dst, dst, (u128 *)ctrblocks);
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
}
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
NULL, fpu_enabled, nbytes);
}
while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
serpent_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
static inline void serpent_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
int err;
crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ablkcipher_setkey(child, key, key_len);
crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int __ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->encrypt(
&desc, req->dst, req->src, req->nbytes);
}
static int ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_encrypt(cryptd_req);
} else {
return __ablk_encrypt(req);
}
}
static int ablk_decrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_decrypt(cryptd_req);
} else {
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->decrypt(
&desc, req->dst, req->src, req->nbytes);
}
}
static void ablk_exit(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
static int ablk_init(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
char drv_name[CRYPTO_MAX_ALG_NAME];
snprintf(drv_name, sizeof(drv_name), "__driver-%s",
crypto_tfm_alg_driver_name(tfm));
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
return 0;
}
static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-sse2",
.cra_driver_name = "__driver-ecb-serpent-sse2",
@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,

View File

@ -468,7 +468,7 @@ W_PRECALC_SSSE3
*/
SHA1_VECTOR_ASM sha1_transform_ssse3
#ifdef SHA1_ENABLE_AVX_SUPPORT
#ifdef CONFIG_AS_AVX
.macro W_PRECALC_AVX

View File

@ -35,7 +35,7 @@
asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
unsigned int rounds);
#ifdef SHA1_ENABLE_AVX_SUPPORT
#ifdef CONFIG_AS_AVX
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
#endif
@ -184,7 +184,7 @@ static struct shash_alg alg = {
}
};
#ifdef SHA1_ENABLE_AVX_SUPPORT
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
u64 xcr0;
@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void)
if (cpu_has_ssse3)
sha1_transform_asm = sha1_transform_ssse3;
#ifdef SHA1_ENABLE_AVX_SUPPORT
#ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */
if (avx_usable())
sha1_transform_asm = sha1_transform_avx;

View File

@ -0,0 +1,300 @@
/*
* Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "twofish-avx-x86_64-asm_64.S"
.text
/* structure of crypto context */
#define s0 0
#define s1 1024
#define s2 2048
#define s3 3072
#define w 4096
#define k 4128
/**********************************************************************
8-way AVX twofish
**********************************************************************/
#define CTX %rdi
#define RA1 %xmm0
#define RB1 %xmm1
#define RC1 %xmm2
#define RD1 %xmm3
#define RA2 %xmm4
#define RB2 %xmm5
#define RC2 %xmm6
#define RD2 %xmm7
#define RX %xmm8
#define RY %xmm9
#define RK1 %xmm10
#define RK2 %xmm11
#define RID1 %rax
#define RID1b %al
#define RID2 %rbx
#define RID2b %bl
#define RGI1 %rdx
#define RGI1bl %dl
#define RGI1bh %dh
#define RGI2 %rcx
#define RGI2bl %cl
#define RGI2bh %ch
#define RGS1 %r8
#define RGS1d %r8d
#define RGS2 %r9
#define RGS2d %r9d
#define RGS3 %r10
#define RGS3d %r10d
#define lookup_32bit(t0, t1, t2, t3, src, dst) \
movb src ## bl, RID1b; \
movb src ## bh, RID2b; \
movl t0(CTX, RID1, 4), dst ## d; \
xorl t1(CTX, RID2, 4), dst ## d; \
shrq $16, src; \
movb src ## bl, RID1b; \
movb src ## bh, RID2b; \
xorl t2(CTX, RID1, 4), dst ## d; \
xorl t3(CTX, RID2, 4), dst ## d;
#define G(a, x, t0, t1, t2, t3) \
vmovq a, RGI1; \
vpsrldq $8, a, x; \
vmovq x, RGI2; \
\
lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
shrq $16, RGI1; \
lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
shlq $32, RGS2; \
orq RGS1, RGS2; \
\
lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
shrq $16, RGI2; \
lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
shlq $32, RGS3; \
orq RGS1, RGS3; \
\
vmovq RGS2, x; \
vpinsrq $1, RGS3, x, x;
#define encround(a, b, c, d, x, y) \
G(a, x, s0, s1, s2, s3); \
G(b, y, s1, s2, s3, s0); \
vpaddd x, y, x; \
vpaddd y, x, y; \
vpaddd x, RK1, x; \
vpaddd y, RK2, y; \
vpxor x, c, c; \
vpsrld $1, c, x; \
vpslld $(32 - 1), c, c; \
vpor c, x, c; \
vpslld $1, d, x; \
vpsrld $(32 - 1), d, d; \
vpor d, x, d; \
vpxor d, y, d;
#define decround(a, b, c, d, x, y) \
G(a, x, s0, s1, s2, s3); \
G(b, y, s1, s2, s3, s0); \
vpaddd x, y, x; \
vpaddd y, x, y; \
vpaddd y, RK2, y; \
vpxor d, y, d; \
vpsrld $1, d, y; \
vpslld $(32 - 1), d, d; \
vpor d, y, d; \
vpslld $1, c, y; \
vpsrld $(32 - 1), c, c; \
vpor c, y, c; \
vpaddd x, RK1, x; \
vpxor x, c, c;
#define encrypt_round(n, a, b, c, d) \
vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
#define decrypt_round(n, a, b, c, d) \
vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
#define encrypt_cycle(n) \
encrypt_round((2*n), RA, RB, RC, RD); \
encrypt_round(((2*n) + 1), RC, RD, RA, RB);
#define decrypt_cycle(n) \
decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
decrypt_round((2*n), RA, RB, RC, RD);
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
vpunpckldq x1, x0, t0; \
vpunpckhdq x1, x0, t2; \
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x3; \
\
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1; \
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
vpxor (0*4*4)(in), wkey, x0; \
vpxor (1*4*4)(in), wkey, x1; \
vpxor (2*4*4)(in), wkey, x2; \
vpxor (3*4*4)(in), wkey, x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpxor x0, wkey, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor x1, wkey, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor x2, wkey, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor x3, wkey, x3; \
vmovdqu x3, (3*4*4)(out);
#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpxor x0, wkey, x0; \
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor x1, wkey, x1; \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor x2, wkey, x2; \
vpxor (2*4*4)(out), x2, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor x3, wkey, x3; \
vpxor (3*4*4)(out), x3, x3; \
vmovdqu x3, (3*4*4)(out);
.align 8
.global __twofish_enc_blk_8way
.type __twofish_enc_blk_8way,@function;
__twofish_enc_blk_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %rbx;
pushq %rcx;
vmovdqu w(CTX), RK1;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
xorq RID1, RID1;
xorq RID2, RID2;
encrypt_cycle(0);
encrypt_cycle(1);
encrypt_cycle(2);
encrypt_cycle(3);
encrypt_cycle(4);
encrypt_cycle(5);
encrypt_cycle(6);
encrypt_cycle(7);
vmovdqu (w+4*4)(CTX), RK1;
popq %rcx;
popq %rbx;
leaq (4*4*4)(%rsi), %rax;
testb %cl, %cl;
jnz __enc_xor8;
outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
ret;
__enc_xor8:
outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
ret;
.align 8
.global twofish_dec_blk_8way
.type twofish_dec_blk_8way,@function;
twofish_dec_blk_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %rbx;
vmovdqu (w+4*4)(CTX), RK1;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
xorq RID1, RID1;
xorq RID2, RID2;
decrypt_cycle(7);
decrypt_cycle(6);
decrypt_cycle(5);
decrypt_cycle(4);
decrypt_cycle(3);
decrypt_cycle(2);
decrypt_cycle(1);
decrypt_cycle(0);
vmovdqu (w)(CTX), RK1;
popq %rbx;
leaq (4*4*4)(%rsi), %rax;
outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
ret;

View File

@ -0,0 +1,624 @@
/*
* Glue Code for AVX assembler version of Twofish Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <crypto/cryptd.h>
#include <crypto/b128ops.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/twofish.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#define TWOFISH_PARALLEL_BLOCKS 8
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}
/* 8-way parallel cipher functions */
asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_8way(ctx, dst, src, false);
}
static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_8way(ctx, dst, src, true);
}
static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
twofish_dec_blk_8way(ctx, dst, src);
}
static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
unsigned int j;
for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
unsigned int i;
for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
} }
};
static const struct common_glue_ctx twofish_ctr = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
}, {
.num_blocks = 3,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
} }
};
static const struct common_glue_ctx twofish_dec = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_cbc = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
}, {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
}
static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
fpu_enabled, nbytes);
}
static inline void twofish_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
struct twofish_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
nbytes %= bsize * 3;
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
nbytes %= bsize * 3;
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->twofish_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->twofish_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TWOFISH_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__ecb-twofish-avx",
.cra_driver_name = "__driver-ecb-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-twofish-avx",
.cra_driver_name = "__driver-cbc-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-twofish-avx",
.cra_driver_name = "__driver-ctr-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-twofish-avx",
.cra_driver_name = "__driver-lrw-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list),
.cra_exit = lrw_twofish_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE +
TF_BLOCK_SIZE,
.max_keysize = TF_MAX_KEY_SIZE +
TF_BLOCK_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = lrw_twofish_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-twofish-avx",
.cra_driver_name = "__driver-xts-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
.max_keysize = TF_MAX_KEY_SIZE * 2,
.ivsize = TF_BLOCK_SIZE,
.setkey = xts_twofish_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(twofish)",
.cra_driver_name = "ecb-twofish-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(twofish)",
.cra_driver_name = "cbc-twofish-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(twofish)",
.cra_driver_name = "ctr-twofish-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(twofish)",
.cra_driver_name = "lrw-twofish-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE +
TF_BLOCK_SIZE,
.max_keysize = TF_MAX_KEY_SIZE +
TF_BLOCK_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(twofish)",
.cra_driver_name = "xts-twofish-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
.max_keysize = TF_MAX_KEY_SIZE * 2,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init twofish_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave) {
printk(KERN_INFO "AVX instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
printk(KERN_INFO "AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
}
static void __exit twofish_exit(void)
{
crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
}
module_init(twofish_init);
module_exit(twofish_exit);
MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS("twofish");

View File

@ -3,11 +3,6 @@
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -33,20 +28,13 @@
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <crypto/b128ops.h>
#include <asm/crypto/twofish.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
/* regular block cipher functions from twofish_x86_64 module */
asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* 3-way parallel cipher functions */
asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
__twofish_enc_blk_3way(ctx, dst, src, true);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes;
int err;
u128 ivs[2];
err = blkcipher_walk_virt(desc, walk);
ivs[0] = src[0];
ivs[1] = src[1];
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
fn_3way(ctx, wdst, wsrc);
u128_xor(&dst[1], &dst[1], &ivs[0]);
u128_xor(&dst[2], &dst[2], &ivs[1]);
}
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
wsrc += bsize * 3;
wdst += bsize * 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
be128 ctrblk;
if (nbytes < bsize)
goto done;
}
if (dst != src)
*dst = *src;
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, dst, (u128 *)&ctrblk);
}
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[3];
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
}
return err;
u128_to_be128(&ctrblks[0], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[1], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[2], iv);
u128_inc(iv);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
} }
};
static const struct common_glue_ctx twofish_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
} }
};
static const struct common_glue_ctx twofish_dec = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ivs[3 - 1];
u128 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
nbytes -= bsize * (3 - 1);
src -= 3 - 1;
dst -= 3 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
u128_xor(dst + 1, dst + 1, ivs + 0);
u128_xor(dst + 2, dst + 2, ivs + 1);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[TF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
twofish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, TF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
be128 ctrblocks[3];
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
}
/* create ctrblks for parallel encrypt */
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
u128_to_be128(&ctrblocks[1], &ctrblk);
u128_inc(&ctrblk);
u128_to_be128(&ctrblocks[2], &ctrblk);
u128_inc(&ctrblk);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 3;
dst += 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
u128_xor(dst, dst, (u128 *)ctrblocks);
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
}
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
twofish_dec_blk(ctx, srcdst, srcdst);
}
struct twofish_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct twofish_ctx twofish_ctx;
};
static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
}
EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return lrw_crypt(desc, dst, src, nbytes, &req);
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
};
static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
EXPORT_SYMBOL_GPL(xts_twofish_setkey);
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { {
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
.cra_exit = lrw_exit_tfm,
.cra_exit = lrw_twofish_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,

View File

@ -0,0 +1,31 @@
/*
* Shared async block cipher helpers
*/
#ifndef _CRYPTO_ABLK_HELPER_H
#define _CRYPTO_ABLK_HELPER_H
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <crypto/cryptd.h>
struct async_helper_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len);
extern int __ablk_encrypt(struct ablkcipher_request *req);
extern int ablk_encrypt(struct ablkcipher_request *req);
extern int ablk_decrypt(struct ablkcipher_request *req);
extern void ablk_exit(struct crypto_tfm *tfm);
extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
extern int ablk_init(struct crypto_tfm *tfm);
#endif /* _CRYPTO_ABLK_HELPER_H */

View File

@ -0,0 +1,115 @@
/*
* Shared glue code for 128bit block ciphers
*/
#ifndef _CRYPTO_GLUE_HELPER_H
#define _CRYPTO_GLUE_HELPER_H
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <asm/i387.h>
#include <crypto/b128ops.h>
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
struct common_glue_func_entry {
unsigned int num_blocks; /* number of blocks that @fn will process */
union {
common_glue_func_t ecb;
common_glue_cbc_func_t cbc;
common_glue_ctr_func_t ctr;
} fn_u;
};
struct common_glue_ctx {
unsigned int num_funcs;
int fpu_blocks_limit; /* -1 means fpu not needed at all */
/*
* First funcs entry must have largest num_blocks and last funcs entry
* must have num_blocks == 1!
*/
struct common_glue_func_entry funcs[];
};
static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
struct blkcipher_desc *desc,
bool fpu_enabled, unsigned int nbytes)
{
if (likely(fpu_blocks_limit < 0))
return false;
if (fpu_enabled)
return true;
/*
* Vector-registers are only used when chunk to be processed is large
* enough, so do not enable FPU until it is necessary.
*/
if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
return false;
if (desc) {
/* prevent sleeping if FPU is in use */
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
}
kernel_fpu_begin();
return true;
}
static inline void glue_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
kernel_fpu_end();
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes);
extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src,
unsigned int nbytes);
extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src,
unsigned int nbytes);
extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes);
#endif /* _CRYPTO_GLUE_HELPER_H */

View File

@ -0,0 +1,32 @@
#ifndef ASM_X86_SERPENT_AVX_H
#define ASM_X86_SERPENT_AVX_H
#include <linux/crypto.h>
#include <crypto/serpent.h>
#define SERPENT_PARALLEL_BLOCKS 8
asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_8way_avx(ctx, dst, src, false);
}
static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_8way_avx(ctx, dst, src, true);
}
static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
serpent_dec_blk_8way_avx(ctx, dst, src);
}
#endif

View File

@ -1,5 +1,5 @@
#ifndef ASM_X86_SERPENT_H
#define ASM_X86_SERPENT_H
#ifndef ASM_X86_SERPENT_SSE2_H
#define ASM_X86_SERPENT_SSE2_H
#include <linux/crypto.h>
#include <crypto/serpent.h>

View File

@ -0,0 +1,46 @@
#ifndef ASM_X86_TWOFISH_H
#define ASM_X86_TWOFISH_H
#include <linux/crypto.h>
#include <crypto/twofish.h>
#include <crypto/lrw.h>
#include <crypto/b128ops.h>
struct twofish_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct twofish_ctx twofish_ctx;
};
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
};
/* regular block cipher functions from twofish_x86_64 module */
asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* 3-way parallel cipher functions */
asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
#endif /* ASM_X86_TWOFISH_H */

View File

@ -174,6 +174,16 @@ config CRYPTO_TEST
help
Quick & dirty crypto test module.
config CRYPTO_ABLK_HELPER_X86
tristate
depends on X86
select CRYPTO_CRYPTD
config CRYPTO_GLUE_HELPER_X86
tristate
depends on X86
select CRYPTO_ALGAPI
comment "Authenticated Encryption with Associated Data"
config CRYPTO_CCM
@ -552,6 +562,7 @@ config CRYPTO_AES_NI_INTEL
select CRYPTO_AES_X86_64 if 64BIT
select CRYPTO_AES_586 if !64BIT
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_ALGAPI
help
Use Intel AES-NI instructions for AES algorithm.
@ -593,7 +604,7 @@ config CRYPTO_ANUBIS
config CRYPTO_ARC4
tristate "ARC4 cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_BLKCIPHER
help
ARC4 cipher algorithm.
@ -660,6 +671,7 @@ config CRYPTO_CAMELLIA_X86_64
depends on X86 && 64BIT
depends on CRYPTO
select CRYPTO_ALGAPI
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_LRW
select CRYPTO_XTS
help
@ -786,6 +798,8 @@ config CRYPTO_SERPENT_SSE2_X86_64
depends on X86 && 64BIT
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_SERPENT
select CRYPTO_LRW
select CRYPTO_XTS
@ -806,6 +820,8 @@ config CRYPTO_SERPENT_SSE2_586
depends on X86 && !64BIT
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_SERPENT
select CRYPTO_LRW
select CRYPTO_XTS
@ -821,6 +837,28 @@ config CRYPTO_SERPENT_SSE2_586
See also:
<http://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_SERPENT_AVX_X86_64
tristate "Serpent cipher algorithm (x86_64/AVX)"
depends on X86 && 64BIT
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_SERPENT
select CRYPTO_LRW
select CRYPTO_XTS
help
Serpent cipher algorithm, by Anderson, Biham & Knudsen.
Keys are allowed to be from 0 to 256 bits in length, in steps
of 8 bits.
This module provides the Serpent cipher algorithm that processes
eight blocks parallel using the AVX instruction set.
See also:
<http://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
select CRYPTO_ALGAPI
@ -897,6 +935,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY
select CRYPTO_ALGAPI
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_LRW
select CRYPTO_XTS
help
@ -913,6 +952,32 @@ config CRYPTO_TWOFISH_X86_64_3WAY
See also:
<http://www.schneier.com/twofish.html>
config CRYPTO_TWOFISH_AVX_X86_64
tristate "Twofish cipher algorithm (x86_64/AVX)"
depends on X86 && 64BIT
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
select CRYPTO_TWOFISH_X86_64_3WAY
select CRYPTO_LRW
select CRYPTO_XTS
help
Twofish cipher algorithm (x86_64/AVX).
Twofish was submitted as an AES (Advanced Encryption Standard)
candidate cipher by researchers at CounterPane Systems. It is a
16 round block cipher supporting key sizes of 128, 192, and 256
bits.
This module provides the Twofish cipher algorithm that processes
eight blocks parallel using the AVX Instruction Set.
See also:
<http://www.schneier.com/twofish.html>
comment "Compression"
config CRYPTO_DEFLATE

View File

@ -24,22 +24,6 @@
static LIST_HEAD(crypto_template_list);
void crypto_larval_error(const char *name, u32 type, u32 mask)
{
struct crypto_alg *alg;
alg = crypto_alg_lookup(name, type, mask);
if (alg) {
if (crypto_is_larval(alg)) {
struct crypto_larval *larval = (void *)alg;
complete_all(&larval->completion);
}
crypto_mod_put(alg);
}
}
EXPORT_SYMBOL_GPL(crypto_larval_error);
static inline int crypto_set_driver_name(struct crypto_alg *alg)
{
static const char suffix[] = "-generic";
@ -295,7 +279,6 @@ found:
continue;
larval->adult = alg;
complete_all(&larval->completion);
continue;
}

View File

@ -11,6 +11,7 @@
*/
#include <crypto/internal/aead.h>
#include <linux/completion.h>
#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/init.h>
@ -47,6 +48,8 @@ struct cryptomgr_param {
char larval[CRYPTO_MAX_ALG_NAME];
char template[CRYPTO_MAX_ALG_NAME];
struct completion *completion;
u32 otype;
u32 omask;
};
@ -66,7 +69,7 @@ static int cryptomgr_probe(void *data)
tmpl = crypto_lookup_template(param->template);
if (!tmpl)
goto err;
goto out;
do {
if (tmpl->create) {
@ -83,16 +86,10 @@ static int cryptomgr_probe(void *data)
crypto_tmpl_put(tmpl);
if (err)
goto err;
out:
complete_all(param->completion);
kfree(param);
module_put_and_exit(0);
err:
crypto_larval_error(param->larval, param->otype, param->omask);
goto out;
}
static int cryptomgr_schedule_probe(struct crypto_larval *larval)
@ -192,10 +189,14 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
memcpy(param->larval, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME);
param->completion = &larval->completion;
thread = kthread_run(cryptomgr_probe, param, "cryptomgr_probe");
if (IS_ERR(thread))
goto err_free_param;
wait_for_completion_interruptible(&larval->completion);
return NOTIFY_STOP;
err_free_param:

View File

@ -11,17 +11,19 @@
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#define ARC4_MIN_KEY_SIZE 1
#define ARC4_MAX_KEY_SIZE 256
#define ARC4_BLOCK_SIZE 1
struct arc4_ctx {
u8 S[256];
u8 x, y;
u32 S[256];
u32 x, y;
};
static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@ -37,7 +39,7 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
ctx->S[i] = i;
for (i = 0; i < 256; i++) {
u8 a = ctx->S[i];
u32 a = ctx->S[i];
j = (j + in_key[k] + a) & 0xff;
ctx->S[i] = ctx->S[j];
ctx->S[j] = a;
@ -48,51 +50,114 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
static void arc4_crypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in,
unsigned int len)
{
struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *const S = ctx->S;
u32 x, y, a, b;
u32 ty, ta, tb;
u8 *const S = ctx->S;
u8 x = ctx->x;
u8 y = ctx->y;
u8 a, b;
if (len == 0)
return;
x = ctx->x;
y = ctx->y;
a = S[x];
y = (y + a) & 0xff;
b = S[y];
S[x] = b;
S[y] = a;
x = (x + 1) & 0xff;
*out++ = *in ^ S[(a + b) & 0xff];
do {
S[y] = a;
a = (a + b) & 0xff;
S[x] = b;
x = (x + 1) & 0xff;
ta = S[x];
ty = (y + ta) & 0xff;
tb = S[ty];
*out++ = *in++ ^ S[a];
if (--len == 0)
break;
y = ty;
a = ta;
b = tb;
} while (true);
ctx->x = x;
ctx->y = y;
}
static struct crypto_alg arc4_alg = {
static void arc4_crypt_one(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
arc4_crypt(crypto_tfm_ctx(tfm), out, in, 1);
}
static int ecb_arc4_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct arc4_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while (walk.nbytes > 0) {
u8 *wsrc = walk.src.virt.addr;
u8 *wdst = walk.dst.virt.addr;
arc4_crypt(ctx, wdst, wsrc, walk.nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg arc4_algs[2] = { {
.cra_name = "arc4",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = ARC4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct arc4_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(arc4_alg.cra_list),
.cra_u = { .cipher = {
.cia_min_keysize = ARC4_MIN_KEY_SIZE,
.cia_max_keysize = ARC4_MAX_KEY_SIZE,
.cia_setkey = arc4_set_key,
.cia_encrypt = arc4_crypt,
.cia_decrypt = arc4_crypt } }
};
.cra_u = {
.cipher = {
.cia_min_keysize = ARC4_MIN_KEY_SIZE,
.cia_max_keysize = ARC4_MAX_KEY_SIZE,
.cia_setkey = arc4_set_key,
.cia_encrypt = arc4_crypt_one,
.cia_decrypt = arc4_crypt_one,
},
},
}, {
.cra_name = "ecb(arc4)",
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = ARC4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct arc4_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = ARC4_MIN_KEY_SIZE,
.max_keysize = ARC4_MAX_KEY_SIZE,
.setkey = arc4_set_key,
.encrypt = ecb_arc4_crypt,
.decrypt = ecb_arc4_crypt,
},
},
} };
static int __init arc4_init(void)
{
return crypto_register_alg(&arc4_alg);
return crypto_register_algs(arc4_algs, ARRAY_SIZE(arc4_algs));
}
static void __exit arc4_exit(void)
{
crypto_unregister_alg(&arc4_alg);
crypto_unregister_algs(arc4_algs, ARRAY_SIZE(arc4_algs));
}
module_init(arc4_init);

View File

@ -83,7 +83,6 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm);
struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
void crypto_larval_kill(struct crypto_alg *alg);
struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
void crypto_larval_error(const char *name, u32 type, u32 mask);
void crypto_alg_tested(const char *name, int err);
void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,

View File

@ -809,7 +809,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
struct cipher_speed_template *template,
unsigned int tcount, u8 *keysize)
{
unsigned int ret, i, j, iv_len;
unsigned int ret, i, j, k, iv_len;
struct tcrypt_result tresult;
const char *key;
char iv[128];
@ -883,11 +883,23 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
}
sg_init_table(sg, TVMEMSIZE);
sg_set_buf(sg, tvmem[0] + *keysize,
k = *keysize + *b_size;
if (k > PAGE_SIZE) {
sg_set_buf(sg, tvmem[0] + *keysize,
PAGE_SIZE - *keysize);
for (j = 1; j < TVMEMSIZE; j++) {
sg_set_buf(sg + j, tvmem[j], PAGE_SIZE);
memset(tvmem[j], 0xff, PAGE_SIZE);
k -= PAGE_SIZE;
j = 1;
while (k > PAGE_SIZE) {
sg_set_buf(sg + j, tvmem[j], PAGE_SIZE);
memset(tvmem[j], 0xff, PAGE_SIZE);
j++;
k -= PAGE_SIZE;
}
sg_set_buf(sg + j, tvmem[j], k);
memset(tvmem[j], 0xff, k);
} else {
sg_set_buf(sg, tvmem[0] + *keysize, *b_size);
}
iv_len = crypto_ablkcipher_ivsize(tfm);
@ -1192,6 +1204,9 @@ static int do_test(int m)
case 109:
ret += tcrypt_test("vmac(aes)");
break;
case 110:
ret += tcrypt_test("hmac(crc32)");
break;
case 150:
ret += tcrypt_test("ansi_cprng");
@ -1339,6 +1354,11 @@ static int do_test(int m)
speed_template_32_64);
break;
case 208:
test_cipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
speed_template_8);
break;
case 300:
/* fall through */
@ -1512,6 +1532,14 @@ static int do_test(int m)
speed_template_16_24_32);
test_acipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("ofb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("ofb(aes)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
break;
case 501:
@ -1527,6 +1555,18 @@ static int do_test(int m)
test_acipher_speed("cbc(des3_ede)", DECRYPT, sec,
des3_speed_template, DES3_SPEED_VECTORS,
speed_template_24);
test_acipher_speed("cfb(des3_ede)", ENCRYPT, sec,
des3_speed_template, DES3_SPEED_VECTORS,
speed_template_24);
test_acipher_speed("cfb(des3_ede)", DECRYPT, sec,
des3_speed_template, DES3_SPEED_VECTORS,
speed_template_24);
test_acipher_speed("ofb(des3_ede)", ENCRYPT, sec,
des3_speed_template, DES3_SPEED_VECTORS,
speed_template_24);
test_acipher_speed("ofb(des3_ede)", DECRYPT, sec,
des3_speed_template, DES3_SPEED_VECTORS,
speed_template_24);
break;
case 502:
@ -1538,6 +1578,14 @@ static int do_test(int m)
speed_template_8);
test_acipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
speed_template_8);
test_acipher_speed("cfb(des)", ENCRYPT, sec, NULL, 0,
speed_template_8);
test_acipher_speed("cfb(des)", DECRYPT, sec, NULL, 0,
speed_template_8);
test_acipher_speed("ofb(des)", ENCRYPT, sec, NULL, 0,
speed_template_8);
test_acipher_speed("ofb(des)", DECRYPT, sec, NULL, 0,
speed_template_8);
break;
case 503:
@ -1563,6 +1611,34 @@ static int do_test(int m)
speed_template_32_64);
break;
case 504:
test_acipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
test_acipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0,
speed_template_32_40_48);
test_acipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0,
speed_template_32_40_48);
test_acipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0,
speed_template_32_48_64);
test_acipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0,
speed_template_32_48_64);
break;
case 505:
test_acipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
speed_template_8);
break;
case 1000:
test_available();
break;

View File

@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc,
/* Please keep this list sorted by algorithm name. */
static const struct alg_test_desc alg_test_descs[] = {
{
.alg = "__cbc-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__cbc-serpent-sse2",
.test = alg_test_null,
.suite = {
@ -1548,9 +1563,40 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
}, {
.alg = "__cbc-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
@ -1578,9 +1624,40 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
}, {
.alg = "__driver-cbc-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
@ -1608,9 +1685,25 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
}, {
.alg = "__driver-ecb-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__ghash-pclmulqdqni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.hash = {
.vecs = NULL,
@ -1627,6 +1720,42 @@ static const struct alg_test_desc alg_test_descs[] = {
.count = ANSI_CPRNG_AES_TEST_VECTORS
}
}
}, {
.alg = "authenc(hmac(sha1),cbc(aes))",
.test = alg_test_aead,
.fips_allowed = 1,
.suite = {
.aead = {
.enc = {
.vecs = hmac_sha1_aes_cbc_enc_tv_template,
.count = HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS
}
}
}
}, {
.alg = "authenc(hmac(sha256),cbc(aes))",
.test = alg_test_aead,
.fips_allowed = 1,
.suite = {
.aead = {
.enc = {
.vecs = hmac_sha256_aes_cbc_enc_tv_template,
.count = HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS
}
}
}
}, {
.alg = "authenc(hmac(sha512),cbc(aes))",
.test = alg_test_aead,
.fips_allowed = 1,
.suite = {
.aead = {
.enc = {
.vecs = hmac_sha512_aes_cbc_enc_tv_template,
.count = HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS
}
}
}
}, {
.alg = "cbc(aes)",
.test = alg_test_skcipher,
@ -1775,9 +1904,41 @@ static const struct alg_test_desc alg_test_descs[] = {
.count = CRC32C_TEST_VECTORS
}
}
}, {
.alg = "cryptd(__driver-cbc-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-serpent-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
@ -1805,9 +1966,41 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
}, {
.alg = "cryptd(__driver-ecb-twofish-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-gcm-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__ghash-pclmulqdqni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.hash = {
.vecs = NULL,
@ -1923,6 +2116,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ecb(__aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@ -2219,6 +2413,15 @@ static const struct alg_test_desc alg_test_descs[] = {
.count = GHASH_TEST_VECTORS
}
}
}, {
.alg = "hmac(crc32)",
.test = alg_test_hash,
.suite = {
.hash = {
.vecs = bfin_crc_tv_template,
.count = BFIN_CRC_TEST_VECTORS
}
}
}, {
.alg = "hmac(md5)",
.test = alg_test_hash,

File diff suppressed because it is too large Load Diff

View File

@ -263,3 +263,15 @@ config HW_RANDOM_PSERIES
module will be called pseries-rng.
If unsure, say Y.
config HW_RANDOM_EXYNOS
tristate "EXYNOS HW random number generator support"
depends on HW_RANDOM && HAS_IOMEM && HAVE_CLK
---help---
This driver provides kernel-side support for the Random Number
Generator hardware found on EXYNOS SOCs.
To compile this driver as a module, choose M here: the
module will be called exynos-rng.
If unsure, say Y.

View File

@ -23,3 +23,4 @@ obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
obj-$(CONFIG_HW_RANDOM_PICOXCELL) += picoxcell-rng.o
obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o
obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o

View File

@ -0,0 +1,182 @@
/*
* exynos-rng.c - Random Number Generator driver for the exynos
*
* Copyright (C) 2012 Samsung Electronics
* Jonghwa Lee <jonghwa3.lee@smasung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation;
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/hw_random.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/pm_runtime.h>
#include <linux/err.h>
#define EXYNOS_PRNG_STATUS_OFFSET 0x10
#define EXYNOS_PRNG_SEED_OFFSET 0x140
#define EXYNOS_PRNG_OUT1_OFFSET 0x160
#define SEED_SETTING_DONE BIT(1)
#define PRNG_START 0x18
#define PRNG_DONE BIT(5)
#define EXYNOS_AUTOSUSPEND_DELAY 100
struct exynos_rng {
struct device *dev;
struct hwrng rng;
void __iomem *mem;
struct clk *clk;
};
static u32 exynos_rng_readl(struct exynos_rng *rng, u32 offset)
{
return __raw_readl(rng->mem + offset);
}
static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset)
{
__raw_writel(val, rng->mem + offset);
}
static int exynos_init(struct hwrng *rng)
{
struct exynos_rng *exynos_rng = container_of(rng,
struct exynos_rng, rng);
int i;
int ret = 0;
pm_runtime_get_sync(exynos_rng->dev);
for (i = 0 ; i < 5 ; i++)
exynos_rng_writel(exynos_rng, jiffies,
EXYNOS_PRNG_SEED_OFFSET + 4*i);
if (!(exynos_rng_readl(exynos_rng, EXYNOS_PRNG_STATUS_OFFSET)
& SEED_SETTING_DONE))
ret = -EIO;
pm_runtime_put_noidle(exynos_rng->dev);
return ret;
}
static int exynos_read(struct hwrng *rng, void *buf,
size_t max, bool wait)
{
struct exynos_rng *exynos_rng = container_of(rng,
struct exynos_rng, rng);
u32 *data = buf;
pm_runtime_get_sync(exynos_rng->dev);
exynos_rng_writel(exynos_rng, PRNG_START, 0);
while (!(exynos_rng_readl(exynos_rng,
EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE))
cpu_relax();
exynos_rng_writel(exynos_rng, PRNG_DONE, EXYNOS_PRNG_STATUS_OFFSET);
*data = exynos_rng_readl(exynos_rng, EXYNOS_PRNG_OUT1_OFFSET);
pm_runtime_mark_last_busy(exynos_rng->dev);
pm_runtime_autosuspend(exynos_rng->dev);
return 4;
}
static int __devinit exynos_rng_probe(struct platform_device *pdev)
{
struct exynos_rng *exynos_rng;
exynos_rng = devm_kzalloc(&pdev->dev, sizeof(struct exynos_rng),
GFP_KERNEL);
if (!exynos_rng)
return -ENOMEM;
exynos_rng->dev = &pdev->dev;
exynos_rng->rng.name = "exynos";
exynos_rng->rng.init = exynos_init;
exynos_rng->rng.read = exynos_read;
exynos_rng->clk = devm_clk_get(&pdev->dev, "secss");
if (IS_ERR(exynos_rng->clk)) {
dev_err(&pdev->dev, "Couldn't get clock.\n");
return -ENOENT;
}
exynos_rng->mem = devm_request_and_ioremap(&pdev->dev,
platform_get_resource(pdev, IORESOURCE_MEM, 0));
if (!exynos_rng->mem)
return -EBUSY;
platform_set_drvdata(pdev, exynos_rng);
pm_runtime_set_autosuspend_delay(&pdev->dev, EXYNOS_AUTOSUSPEND_DELAY);
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_enable(&pdev->dev);
return hwrng_register(&exynos_rng->rng);
}
static int __devexit exynos_rng_remove(struct platform_device *pdev)
{
struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
hwrng_unregister(&exynos_rng->rng);
return 0;
}
static int exynos_rng_runtime_suspend(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
clk_disable_unprepare(exynos_rng->clk);
return 0;
}
static int exynos_rng_runtime_resume(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
return clk_prepare_enable(exynos_rng->clk);
}
UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend,
exynos_rng_runtime_resume, NULL);
static struct platform_driver exynos_rng_driver = {
.driver = {
.name = "exynos-rng",
.owner = THIS_MODULE,
.pm = &exynos_rng_pm_ops,
},
.probe = exynos_rng_probe,
.remove = __devexit_p(exynos_rng_remove),
};
module_platform_driver(exynos_rng_driver);
MODULE_DESCRIPTION("EXYNOS 4 H/W Random Number Generator driver");
MODULE_AUTHOR("Jonghwa Lee <jonghwa3.lee@samsung.com>");
MODULE_LICENSE("GPL");

View File

@ -24,6 +24,7 @@
#include <linux/ioport.h>
#include <linux/platform_device.h>
#include <linux/hw_random.h>
#include <linux/delay.h>
#include <linux/io.h>
/* RNGA Registers */
@ -60,16 +61,20 @@
static struct platform_device *rng_dev;
static int mxc_rnga_data_present(struct hwrng *rng)
static int mxc_rnga_data_present(struct hwrng *rng, int wait)
{
int level;
void __iomem *rng_base = (void __iomem *)rng->priv;
int i;
/* how many random numbers is in FIFO? [0-16] */
level = ((__raw_readl(rng_base + RNGA_STATUS) &
RNGA_STATUS_LEVEL_MASK) >> 8);
return level > 0 ? 1 : 0;
for (i = 0; i < 20; i++) {
/* how many random numbers are in FIFO? [0-16] */
int level = (__raw_readl(rng_base + RNGA_STATUS) &
RNGA_STATUS_LEVEL_MASK) >> 8;
if (level || !wait)
return !!level;
udelay(10);
}
return 0;
}
static int mxc_rnga_data_read(struct hwrng *rng, u32 * data)

View File

@ -298,7 +298,7 @@ config CRYPTO_DEV_TEGRA_AES
will be called tegra-aes.
config CRYPTO_DEV_NX
tristate "Support for Power7+ in-Nest cryptographic accleration"
tristate "Support for Power7+ in-Nest cryptographic acceleration"
depends on PPC64 && IBMVIO
select CRYPTO_AES
select CRYPTO_CBC
@ -325,4 +325,58 @@ if CRYPTO_DEV_UX500
source "drivers/crypto/ux500/Kconfig"
endif # if CRYPTO_DEV_UX500
config CRYPTO_DEV_BFIN_CRC
tristate "Support for Blackfin CRC hardware"
depends on BF60x
help
Newer Blackfin processors have CRC hardware. Select this if you
want to use the Blackfin CRC module.
config CRYPTO_DEV_ATMEL_AES
tristate "Support for Atmel AES hw accelerator"
depends on ARCH_AT91
select CRYPTO_CBC
select CRYPTO_ECB
select CRYPTO_AES
select CRYPTO_ALGAPI
select CRYPTO_BLKCIPHER
select CONFIG_AT_HDMAC
help
Some Atmel processors have AES hw accelerator.
Select this if you want to use the Atmel module for
AES algorithms.
To compile this driver as a module, choose M here: the module
will be called atmel-aes.
config CRYPTO_DEV_ATMEL_TDES
tristate "Support for Atmel DES/TDES hw accelerator"
depends on ARCH_AT91
select CRYPTO_DES
select CRYPTO_CBC
select CRYPTO_ECB
select CRYPTO_ALGAPI
select CRYPTO_BLKCIPHER
help
Some Atmel processors have DES/TDES hw accelerator.
Select this if you want to use the Atmel module for
DES/TDES algorithms.
To compile this driver as a module, choose M here: the module
will be called atmel-tdes.
config CRYPTO_DEV_ATMEL_SHA
tristate "Support for Atmel SHA1/SHA256 hw accelerator"
depends on ARCH_AT91
select CRYPTO_SHA1
select CRYPTO_SHA256
select CRYPTO_ALGAPI
help
Some Atmel processors have SHA1/SHA256 hw accelerator.
Select this if you want to use the Atmel module for
SHA1/SHA256 algorithms.
To compile this driver as a module, choose M here: the module
will be called atmel-sha.
endif # CRYPTO_HW

View File

@ -14,4 +14,9 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o

View File

@ -0,0 +1,62 @@
#ifndef __ATMEL_AES_REGS_H__
#define __ATMEL_AES_REGS_H__
#define AES_CR 0x00
#define AES_CR_START (1 << 0)
#define AES_CR_SWRST (1 << 8)
#define AES_CR_LOADSEED (1 << 16)
#define AES_MR 0x04
#define AES_MR_CYPHER_DEC (0 << 0)
#define AES_MR_CYPHER_ENC (1 << 0)
#define AES_MR_DUALBUFF (1 << 3)
#define AES_MR_PROCDLY_MASK (0xF << 4)
#define AES_MR_PROCDLY_OFFSET 4
#define AES_MR_SMOD_MASK (0x3 << 8)
#define AES_MR_SMOD_MANUAL (0x0 << 8)
#define AES_MR_SMOD_AUTO (0x1 << 8)
#define AES_MR_SMOD_IDATAR0 (0x2 << 8)
#define AES_MR_KEYSIZE_MASK (0x3 << 10)
#define AES_MR_KEYSIZE_128 (0x0 << 10)
#define AES_MR_KEYSIZE_192 (0x1 << 10)
#define AES_MR_KEYSIZE_256 (0x2 << 10)
#define AES_MR_OPMOD_MASK (0x7 << 12)
#define AES_MR_OPMOD_ECB (0x0 << 12)
#define AES_MR_OPMOD_CBC (0x1 << 12)
#define AES_MR_OPMOD_OFB (0x2 << 12)
#define AES_MR_OPMOD_CFB (0x3 << 12)
#define AES_MR_OPMOD_CTR (0x4 << 12)
#define AES_MR_LOD (0x1 << 15)
#define AES_MR_CFBS_MASK (0x7 << 16)
#define AES_MR_CFBS_128b (0x0 << 16)
#define AES_MR_CFBS_64b (0x1 << 16)
#define AES_MR_CFBS_32b (0x2 << 16)
#define AES_MR_CFBS_16b (0x3 << 16)
#define AES_MR_CFBS_8b (0x4 << 16)
#define AES_MR_CKEY_MASK (0xF << 20)
#define AES_MR_CKEY_OFFSET 20
#define AES_MR_CMTYP_MASK (0x1F << 24)
#define AES_MR_CMTYP_OFFSET 24
#define AES_IER 0x10
#define AES_IDR 0x14
#define AES_IMR 0x18
#define AES_ISR 0x1C
#define AES_INT_DATARDY (1 << 0)
#define AES_INT_URAD (1 << 8)
#define AES_ISR_URAT_MASK (0xF << 12)
#define AES_ISR_URAT_IDR_WR_PROC (0x0 << 12)
#define AES_ISR_URAT_ODR_RD_PROC (0x1 << 12)
#define AES_ISR_URAT_MR_WR_PROC (0x2 << 12)
#define AES_ISR_URAT_ODR_RD_SUBK (0x3 << 12)
#define AES_ISR_URAT_MR_WR_SUBK (0x4 << 12)
#define AES_ISR_URAT_WOR_RD (0x5 << 12)
#define AES_KEYWR(x) (0x20 + ((x) * 0x04))
#define AES_IDATAR(x) (0x40 + ((x) * 0x04))
#define AES_ODATAR(x) (0x50 + ((x) * 0x04))
#define AES_IVR(x) (0x60 + ((x) * 0x04))
#define AES_HW_VERSION 0xFC
#endif /* __ATMEL_AES_REGS_H__ */

1206
drivers/crypto/atmel-aes.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
#ifndef __ATMEL_SHA_REGS_H__
#define __ATMEL_SHA_REGS_H__
#define SHA_REG_DIGEST(x) (0x80 + ((x) * 0x04))
#define SHA_REG_DIN(x) (0x40 + ((x) * 0x04))
#define SHA_CR 0x00
#define SHA_CR_START (1 << 0)
#define SHA_CR_FIRST (1 << 4)
#define SHA_CR_SWRST (1 << 8)
#define SHA_MR 0x04
#define SHA_MR_MODE_MASK (0x3 << 0)
#define SHA_MR_MODE_MANUAL 0x0
#define SHA_MR_MODE_AUTO 0x1
#define SHA_MR_MODE_PDC 0x2
#define SHA_MR_DUALBUFF (1 << 3)
#define SHA_MR_PROCDLY (1 << 4)
#define SHA_MR_ALGO_SHA1 (0 << 8)
#define SHA_MR_ALGO_SHA256 (1 << 8)
#define SHA_IER 0x10
#define SHA_IDR 0x14
#define SHA_IMR 0x18
#define SHA_ISR 0x1C
#define SHA_INT_DATARDY (1 << 0)
#define SHA_INT_ENDTX (1 << 1)
#define SHA_INT_TXBUFE (1 << 2)
#define SHA_INT_URAD (1 << 8)
#define SHA_ISR_URAT_MASK (0x7 << 12)
#define SHA_ISR_URAT_IDR (0x0 << 12)
#define SHA_ISR_URAT_ODR (0x1 << 12)
#define SHA_ISR_URAT_MR (0x2 << 12)
#define SHA_ISR_URAT_WO (0x5 << 12)
#define SHA_TPR 0x108
#define SHA_TCR 0x10C
#define SHA_TNPR 0x118
#define SHA_TNCR 0x11C
#define SHA_PTCR 0x120
#define SHA_PTCR_TXTEN (1 << 8)
#define SHA_PTCR_TXTDIS (1 << 9)
#define SHA_PTSR 0x124
#define SHA_PTSR_TXTEN (1 << 8)
#endif /* __ATMEL_SHA_REGS_H__ */

1112
drivers/crypto/atmel-sha.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
#ifndef __ATMEL_TDES_REGS_H__
#define __ATMEL_TDES_REGS_H__
#define TDES_CR 0x00
#define TDES_CR_START (1 << 0)
#define TDES_CR_SWRST (1 << 8)
#define TDES_CR_LOADSEED (1 << 16)
#define TDES_MR 0x04
#define TDES_MR_CYPHER_DEC (0 << 0)
#define TDES_MR_CYPHER_ENC (1 << 0)
#define TDES_MR_TDESMOD_MASK (0x3 << 1)
#define TDES_MR_TDESMOD_DES (0x0 << 1)
#define TDES_MR_TDESMOD_TDES (0x1 << 1)
#define TDES_MR_TDESMOD_XTEA (0x2 << 1)
#define TDES_MR_KEYMOD_3KEY (0 << 4)
#define TDES_MR_KEYMOD_2KEY (1 << 4)
#define TDES_MR_SMOD_MASK (0x3 << 8)
#define TDES_MR_SMOD_MANUAL (0x0 << 8)
#define TDES_MR_SMOD_AUTO (0x1 << 8)
#define TDES_MR_SMOD_PDC (0x2 << 8)
#define TDES_MR_OPMOD_MASK (0x3 << 12)
#define TDES_MR_OPMOD_ECB (0x0 << 12)
#define TDES_MR_OPMOD_CBC (0x1 << 12)
#define TDES_MR_OPMOD_OFB (0x2 << 12)
#define TDES_MR_OPMOD_CFB (0x3 << 12)
#define TDES_MR_LOD (0x1 << 15)
#define TDES_MR_CFBS_MASK (0x3 << 16)
#define TDES_MR_CFBS_64b (0x0 << 16)
#define TDES_MR_CFBS_32b (0x1 << 16)
#define TDES_MR_CFBS_16b (0x2 << 16)
#define TDES_MR_CFBS_8b (0x3 << 16)
#define TDES_MR_CKEY_MASK (0xF << 20)
#define TDES_MR_CKEY_OFFSET 20
#define TDES_MR_CTYPE_MASK (0x3F << 24)
#define TDES_MR_CTYPE_OFFSET 24
#define TDES_IER 0x10
#define TDES_IDR 0x14
#define TDES_IMR 0x18
#define TDES_ISR 0x1C
#define TDES_INT_DATARDY (1 << 0)
#define TDES_INT_ENDRX (1 << 1)
#define TDES_INT_ENDTX (1 << 2)
#define TDES_INT_RXBUFF (1 << 3)
#define TDES_INT_TXBUFE (1 << 4)
#define TDES_INT_URAD (1 << 8)
#define TDES_ISR_URAT_MASK (0x3 << 12)
#define TDES_ISR_URAT_IDR (0x0 << 12)
#define TDES_ISR_URAT_ODR (0x1 << 12)
#define TDES_ISR_URAT_MR (0x2 << 12)
#define TDES_ISR_URAT_WO (0x3 << 12)
#define TDES_KEY1W1R 0x20
#define TDES_KEY1W2R 0x24
#define TDES_KEY2W1R 0x28
#define TDES_KEY2W2R 0x2C
#define TDES_KEY3W1R 0x30
#define TDES_KEY3W2R 0x34
#define TDES_IDATA1R 0x40
#define TDES_IDATA2R 0x44
#define TDES_ODATA1R 0x50
#define TDES_ODATA2R 0x54
#define TDES_IV1R 0x60
#define TDES_IV2R 0x64
#define TDES_XTEARNDR 0x70
#define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0)
#define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0
#define TDES_RPR 0x100
#define TDES_RCR 0x104
#define TDES_TPR 0x108
#define TDES_TCR 0x10C
#define TDES_RNPR 0x118
#define TDES_RNCR 0x11C
#define TDES_TNPR 0x118
#define TDES_TNCR 0x11C
#define TDES_PTCR 0x120
#define TDES_PTCR_RXTEN (1 << 0)
#define TDES_PTCR_RXTDIS (1 << 1)
#define TDES_PTCR_TXTEN (1 << 8)
#define TDES_PTCR_TXTDIS (1 << 9)
#define TDES_PTSR 0x124
#define TDES_PTSR_RXTEN (1 << 0)
#define TDES_PTSR_TXTEN (1 << 8)
#endif /* __ATMEL_TDES_REGS_H__ */

1215
drivers/crypto/atmel-tdes.c Normal file

File diff suppressed because it is too large Load Diff

780
drivers/crypto/bfin_crc.c Normal file
View File

@ -0,0 +1,780 @@
/*
* Cryptographic API.
*
* Support Blackfin CRC HW acceleration.
*
* Copyright 2012 Analog Devices Inc.
*
* Licensed under the GPL-2.
*/
#include <linux/err.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/delay.h>
#include <linux/unaligned/access_ok.h>
#include <linux/crypto.h>
#include <linux/cryptohash.h>
#include <crypto/scatterwalk.h>
#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/internal/hash.h>
#include <asm/blackfin.h>
#include <asm/bfin_crc.h>
#include <asm/dma.h>
#include <asm/portmux.h>
#define CRC_CCRYPTO_QUEUE_LENGTH 5
#define DRIVER_NAME "bfin-hmac-crc"
#define CHKSUM_DIGEST_SIZE 4
#define CHKSUM_BLOCK_SIZE 1
#define CRC_MAX_DMA_DESC 100
#define CRC_CRYPTO_STATE_UPDATE 1
#define CRC_CRYPTO_STATE_FINALUPDATE 2
#define CRC_CRYPTO_STATE_FINISH 3
struct bfin_crypto_crc {
struct list_head list;
struct device *dev;
spinlock_t lock;
int irq;
int dma_ch;
u32 poly;
volatile struct crc_register *regs;
struct ahash_request *req; /* current request in operation */
struct dma_desc_array *sg_cpu; /* virt addr of sg dma descriptors */
dma_addr_t sg_dma; /* phy addr of sg dma descriptors */
u8 *sg_mid_buf;
struct tasklet_struct done_task;
struct crypto_queue queue; /* waiting requests */
u8 busy:1; /* crc device in operation flag */
};
static struct bfin_crypto_crc_list {
struct list_head dev_list;
spinlock_t lock;
} crc_list;
struct bfin_crypto_crc_reqctx {
struct bfin_crypto_crc *crc;
unsigned int total; /* total request bytes */
size_t sg_buflen; /* bytes for this update */
unsigned int sg_nents;
struct scatterlist *sg; /* sg list head for this update*/
struct scatterlist bufsl[2]; /* chained sg list */
size_t bufnext_len;
size_t buflast_len;
u8 bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */
u8 buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */
u8 flag;
};
struct bfin_crypto_crc_ctx {
struct bfin_crypto_crc *crc;
u32 key;
};
/*
* derive number of elements in scatterlist
*/
static int sg_count(struct scatterlist *sg_list)
{
struct scatterlist *sg = sg_list;
int sg_nents = 1;
if (sg_list == NULL)
return 0;
while (!sg_is_last(sg)) {
sg_nents++;
sg = scatterwalk_sg_next(sg);
}
return sg_nents;
}
/*
* get element in scatter list by given index
*/
static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents,
unsigned int index)
{
struct scatterlist *sg = NULL;
int i;
for_each_sg(sg_list, sg, nents, i)
if (i == index)
break;
return sg;
}
static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key)
{
crc->regs->datacntrld = 0;
crc->regs->control = MODE_CALC_CRC << OPMODE_OFFSET;
crc->regs->curresult = key;
/* setup CRC interrupts */
crc->regs->status = CMPERRI | DCNTEXPI;
crc->regs->intrenset = CMPERRI | DCNTEXPI;
SSYNC();
return 0;
}
static int bfin_crypto_crc_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
struct bfin_crypto_crc *crc;
dev_dbg(crc->dev, "crc_init\n");
spin_lock_bh(&crc_list.lock);
list_for_each_entry(crc, &crc_list.dev_list, list) {
crc_ctx->crc = crc;
break;
}
spin_unlock_bh(&crc_list.lock);
if (sg_count(req->src) > CRC_MAX_DMA_DESC) {
dev_dbg(crc->dev, "init: requested sg list is too big > %d\n",
CRC_MAX_DMA_DESC);
return -EINVAL;
}
ctx->crc = crc;
ctx->bufnext_len = 0;
ctx->buflast_len = 0;
ctx->sg_buflen = 0;
ctx->total = 0;
ctx->flag = 0;
/* init crc results */
put_unaligned_le32(crc_ctx->key, req->result);
dev_dbg(crc->dev, "init: digest size: %d\n",
crypto_ahash_digestsize(tfm));
return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
}
static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
{
struct scatterlist *sg;
struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req);
int i = 0, j = 0;
unsigned long dma_config;
unsigned int dma_count;
unsigned int dma_addr;
unsigned int mid_dma_count = 0;
int dma_mod;
dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE);
for_each_sg(ctx->sg, sg, ctx->sg_nents, j) {
dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32;
dma_addr = sg_dma_address(sg);
/* deduce extra bytes in last sg */
if (sg_is_last(sg))
dma_count = sg_dma_len(sg) - ctx->bufnext_len;
else
dma_count = sg_dma_len(sg);
if (mid_dma_count) {
/* Append last middle dma buffer to 4 bytes with first
bytes in current sg buffer. Move addr of current
sg and deduce the length of current sg.
*/
memcpy(crc->sg_mid_buf +((i-1) << 2) + mid_dma_count,
(void *)dma_addr,
CHKSUM_DIGEST_SIZE - mid_dma_count);
dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count;
dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count;
}
/* chop current sg dma len to multiple of 32 bits */
mid_dma_count = dma_count % 4;
dma_count &= ~0x3;
if (dma_addr % 4 == 0) {
dma_config |= WDSIZE_32;
dma_count >>= 2;
dma_mod = 4;
} else if (dma_addr % 2 == 0) {
dma_config |= WDSIZE_16;
dma_count >>= 1;
dma_mod = 2;
} else {
dma_config |= WDSIZE_8;
dma_mod = 1;
}
crc->sg_cpu[i].start_addr = dma_addr;
crc->sg_cpu[i].cfg = dma_config;
crc->sg_cpu[i].x_count = dma_count;
crc->sg_cpu[i].x_modify = dma_mod;
dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
i, crc->sg_cpu[i].start_addr,
crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
crc->sg_cpu[i].x_modify);
i++;
if (mid_dma_count) {
/* copy extra bytes to next middle dma buffer */
dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 |
DMAEN | PSIZE_32 | WDSIZE_32;
memcpy(crc->sg_mid_buf + (i << 2),
(void *)(dma_addr + (dma_count << 2)),
mid_dma_count);
/* setup new dma descriptor for next middle dma */
crc->sg_cpu[i].start_addr = dma_map_single(crc->dev,
crc->sg_mid_buf + (i << 2),
CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
crc->sg_cpu[i].cfg = dma_config;
crc->sg_cpu[i].x_count = 1;
crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
i, crc->sg_cpu[i].start_addr,
crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
crc->sg_cpu[i].x_modify);
i++;
}
}
dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32;
/* For final update req, append the buffer for next update as well*/
if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
ctx->flag == CRC_CRYPTO_STATE_FINISH)) {
crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext,
CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
crc->sg_cpu[i].cfg = dma_config;
crc->sg_cpu[i].x_count = 1;
crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
"cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n",
i, crc->sg_cpu[i].start_addr,
crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
crc->sg_cpu[i].x_modify);
i++;
}
if (i == 0)
return;
flush_dcache_range((unsigned int)crc->sg_cpu,
(unsigned int)crc->sg_cpu +
i * sizeof(struct dma_desc_array));
/* Set the last descriptor to stop mode */
crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
crc->sg_cpu[i - 1].cfg |= DI_EN;
set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
set_dma_x_count(crc->dma_ch, 0);
set_dma_x_modify(crc->dma_ch, 0);
SSYNC();
set_dma_config(crc->dma_ch, dma_config);
}
static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
struct ahash_request *req)
{
struct crypto_async_request *async_req, *backlog;
struct bfin_crypto_crc_reqctx *ctx;
struct scatterlist *sg;
int ret = 0;
int nsg, i, j;
unsigned int nextlen;
unsigned long flags;
spin_lock_irqsave(&crc->lock, flags);
if (req)
ret = ahash_enqueue_request(&crc->queue, req);
if (crc->busy) {
spin_unlock_irqrestore(&crc->lock, flags);
return ret;
}
backlog = crypto_get_backlog(&crc->queue);
async_req = crypto_dequeue_request(&crc->queue);
if (async_req)
crc->busy = 1;
spin_unlock_irqrestore(&crc->lock, flags);
if (!async_req)
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req = ahash_request_cast(async_req);
crc->req = req;
ctx = ahash_request_ctx(req);
ctx->sg = NULL;
ctx->sg_buflen = 0;
ctx->sg_nents = 0;
dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n",
ctx->flag, req->nbytes);
if (ctx->flag == CRC_CRYPTO_STATE_FINISH) {
if (ctx->bufnext_len == 0) {
crc->busy = 0;
return 0;
}
/* Pack last crc update buffer to 32bit */
memset(ctx->bufnext + ctx->bufnext_len, 0,
CHKSUM_DIGEST_SIZE - ctx->bufnext_len);
} else {
/* Pack small data which is less than 32bit to buffer for next update. */
if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) {
memcpy(ctx->bufnext + ctx->bufnext_len,
sg_virt(req->src), req->nbytes);
ctx->bufnext_len += req->nbytes;
if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE &&
ctx->bufnext_len) {
goto finish_update;
} else {
crc->busy = 0;
return 0;
}
}
if (ctx->bufnext_len) {
/* Chain in extra bytes of last update */
ctx->buflast_len = ctx->bufnext_len;
memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len);
nsg = ctx->sg_buflen ? 2 : 1;
sg_init_table(ctx->bufsl, nsg);
sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len);
if (nsg > 1)
scatterwalk_sg_chain(ctx->bufsl, nsg,
req->src);
ctx->sg = ctx->bufsl;
} else
ctx->sg = req->src;
/* Chop crc buffer size to multiple of 32 bit */
nsg = ctx->sg_nents = sg_count(ctx->sg);
ctx->sg_buflen = ctx->buflast_len + req->nbytes;
ctx->bufnext_len = ctx->sg_buflen % 4;
ctx->sg_buflen &= ~0x3;
if (ctx->bufnext_len) {
/* copy extra bytes to buffer for next update */
memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE);
nextlen = ctx->bufnext_len;
for (i = nsg - 1; i >= 0; i--) {
sg = sg_get(ctx->sg, nsg, i);
j = min(nextlen, sg_dma_len(sg));
memcpy(ctx->bufnext + nextlen - j,
sg_virt(sg) + sg_dma_len(sg) - j, j);
if (j == sg_dma_len(sg))
ctx->sg_nents--;
nextlen -= j;
if (nextlen == 0)
break;
}
}
}
finish_update:
if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
ctx->flag == CRC_CRYPTO_STATE_FINISH))
ctx->sg_buflen += CHKSUM_DIGEST_SIZE;
/* set CRC data count before start DMA */
crc->regs->datacnt = ctx->sg_buflen >> 2;
/* setup and enable CRC DMA */
bfin_crypto_crc_config_dma(crc);
/* finally kick off CRC operation */
crc->regs->control |= BLKEN;
SSYNC();
return -EINPROGRESS;
}
static int bfin_crypto_crc_update(struct ahash_request *req)
{
struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
if (!req->nbytes)
return 0;
dev_dbg(ctx->crc->dev, "crc_update\n");
ctx->total += req->nbytes;
ctx->flag = CRC_CRYPTO_STATE_UPDATE;
return bfin_crypto_crc_handle_queue(ctx->crc, req);
}
static int bfin_crypto_crc_final(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
dev_dbg(ctx->crc->dev, "crc_final\n");
ctx->flag = CRC_CRYPTO_STATE_FINISH;
crc_ctx->key = 0;
return bfin_crypto_crc_handle_queue(ctx->crc, req);
}
static int bfin_crypto_crc_finup(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
dev_dbg(ctx->crc->dev, "crc_finishupdate\n");
ctx->total += req->nbytes;
ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE;
crc_ctx->key = 0;
return bfin_crypto_crc_handle_queue(ctx->crc, req);
}
static int bfin_crypto_crc_digest(struct ahash_request *req)
{
int ret;
ret = bfin_crypto_crc_init(req);
if (ret)
return ret;
return bfin_crypto_crc_finup(req);
}
static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
dev_dbg(crc_ctx->crc->dev, "crc_setkey\n");
if (keylen != CHKSUM_DIGEST_SIZE) {
crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
crc_ctx->key = get_unaligned_le32(key);
return 0;
}
static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm)
{
struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm);
crc_ctx->key = 0;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct bfin_crypto_crc_reqctx));
return 0;
}
static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm)
{
}
static struct ahash_alg algs = {
.init = bfin_crypto_crc_init,
.update = bfin_crypto_crc_update,
.final = bfin_crypto_crc_final,
.finup = bfin_crypto_crc_finup,
.digest = bfin_crypto_crc_digest,
.setkey = bfin_crypto_crc_setkey,
.halg.digestsize = CHKSUM_DIGEST_SIZE,
.halg.base = {
.cra_name = "hmac(crc32)",
.cra_driver_name = DRIVER_NAME,
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bfin_crypto_crc_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_init = bfin_crypto_crc_cra_init,
.cra_exit = bfin_crypto_crc_cra_exit,
}
};
static void bfin_crypto_crc_done_task(unsigned long data)
{
struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data;
bfin_crypto_crc_handle_queue(crc, NULL);
}
static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id)
{
struct bfin_crypto_crc *crc = dev_id;
if (crc->regs->status & DCNTEXP) {
crc->regs->status = DCNTEXP;
SSYNC();
/* prepare results */
put_unaligned_le32(crc->regs->result, crc->req->result);
crc->regs->control &= ~BLKEN;
crc->busy = 0;
if (crc->req->base.complete)
crc->req->base.complete(&crc->req->base, 0);
tasklet_schedule(&crc->done_task);
return IRQ_HANDLED;
} else
return IRQ_NONE;
}
#ifdef CONFIG_PM
/**
* bfin_crypto_crc_suspend - suspend crc device
* @pdev: device being suspended
* @state: requested suspend state
*/
static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state)
{
struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
int i = 100000;
while ((crc->regs->control & BLKEN) && --i)
cpu_relax();
if (i == 0)
return -EBUSY;
return 0;
}
#else
# define bfin_crypto_crc_suspend NULL
#endif
#define bfin_crypto_crc_resume NULL
/**
* bfin_crypto_crc_probe - Initialize module
*
*/
static int __devinit bfin_crypto_crc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
struct bfin_crypto_crc *crc;
unsigned int timeout = 100000;
int ret;
crc = kzalloc(sizeof(*crc), GFP_KERNEL);
if (!crc) {
dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
return -ENOMEM;
}
crc->dev = dev;
INIT_LIST_HEAD(&crc->list);
spin_lock_init(&crc->lock);
tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc);
crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (res == NULL) {
dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n");
ret = -ENOENT;
goto out_error_free_mem;
}
crc->regs = ioremap(res->start, resource_size(res));
if (!crc->regs) {
dev_err(&pdev->dev, "Cannot map CRC IO\n");
ret = -ENXIO;
goto out_error_free_mem;
}
crc->irq = platform_get_irq(pdev, 0);
if (crc->irq < 0) {
dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
ret = -ENOENT;
goto out_error_unmap;
}
ret = request_irq(crc->irq, bfin_crypto_crc_handler, IRQF_SHARED, dev_name(dev), crc);
if (ret) {
dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
goto out_error_unmap;
}
res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
if (res == NULL) {
dev_err(&pdev->dev, "No CRC DMA channel specified\n");
ret = -ENOENT;
goto out_error_irq;
}
crc->dma_ch = res->start;
ret = request_dma(crc->dma_ch, dev_name(dev));
if (ret) {
dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
goto out_error_irq;
}
crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
if (crc->sg_cpu == NULL) {
ret = -ENOMEM;
goto out_error_dma;
}
/*
* need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle +
* 1 last + 1 next dma descriptors
*/
crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
crc->regs->control = 0;
SSYNC();
crc->regs->poly = crc->poly = (u32)pdev->dev.platform_data;
SSYNC();
while (!(crc->regs->status & LUTDONE) && (--timeout) > 0)
cpu_relax();
if (timeout == 0)
dev_info(&pdev->dev, "init crc poly timeout\n");
spin_lock(&crc_list.lock);
list_add(&crc->list, &crc_list.dev_list);
spin_unlock(&crc_list.lock);
platform_set_drvdata(pdev, crc);
ret = crypto_register_ahash(&algs);
if (ret) {
spin_lock(&crc_list.lock);
list_del(&crc->list);
spin_unlock(&crc_list.lock);
dev_err(&pdev->dev, "Cann't register crypto ahash device\n");
goto out_error_dma;
}
dev_info(&pdev->dev, "initialized\n");
return 0;
out_error_dma:
if (crc->sg_cpu)
dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
free_dma(crc->dma_ch);
out_error_irq:
free_irq(crc->irq, crc->dev);
out_error_unmap:
iounmap((void *)crc->regs);
out_error_free_mem:
kfree(crc);
return ret;
}
/**
* bfin_crypto_crc_remove - Initialize module
*
*/
static int __devexit bfin_crypto_crc_remove(struct platform_device *pdev)
{
struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
if (!crc)
return -ENODEV;
spin_lock(&crc_list.lock);
list_del(&crc->list);
spin_unlock(&crc_list.lock);
crypto_unregister_ahash(&algs);
tasklet_kill(&crc->done_task);
iounmap((void *)crc->regs);
free_dma(crc->dma_ch);
if (crc->irq > 0)
free_irq(crc->irq, crc->dev);
kfree(crc);
return 0;
}
static struct platform_driver bfin_crypto_crc_driver = {
.probe = bfin_crypto_crc_probe,
.remove = __devexit_p(bfin_crypto_crc_remove),
.suspend = bfin_crypto_crc_suspend,
.resume = bfin_crypto_crc_resume,
.driver = {
.name = DRIVER_NAME,
.owner = THIS_MODULE,
},
};
/**
* bfin_crypto_crc_mod_init - Initialize module
*
* Checks the module params and registers the platform driver.
* Real work is in the platform probe function.
*/
static int __init bfin_crypto_crc_mod_init(void)
{
int ret;
pr_info("Blackfin hardware CRC crypto driver\n");
INIT_LIST_HEAD(&crc_list.dev_list);
spin_lock_init(&crc_list.lock);
ret = platform_driver_register(&bfin_crypto_crc_driver);
if (ret) {
pr_info(KERN_ERR "unable to register driver\n");
return ret;
}
return 0;
}
/**
* bfin_crypto_crc_mod_exit - Deinitialize module
*/
static void __exit bfin_crypto_crc_mod_exit(void)
{
platform_driver_unregister(&bfin_crypto_crc_driver);
}
module_init(bfin_crypto_crc_mod_init);
module_exit(bfin_crypto_crc_mod_exit);
MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver");
MODULE_LICENSE("GPL");

View File

@ -32,10 +32,13 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE
config CRYPTO_DEV_FSL_CAAM_INTC
bool "Job Ring interrupt coalescing"
depends on CRYPTO_DEV_FSL_CAAM
default y
default n
help
Enable the Job Ring's interrupt coalescing feature.
Note: the driver already provides adequate
interrupt coalescing in software.
config CRYPTO_DEV_FSL_CAAM_INTC_COUNT_THLD
int "Job Ring interrupt coalescing count threshold"
depends on CRYPTO_DEV_FSL_CAAM_INTC
@ -70,3 +73,28 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
To compile this as a module, choose M here: the module
will be called caamalg.
config CRYPTO_DEV_FSL_CAAM_AHASH_API
tristate "Register hash algorithm implementations with Crypto API"
depends on CRYPTO_DEV_FSL_CAAM
default y
select CRYPTO_AHASH
help
Selecting this will offload ahash for users of the
scatterlist crypto API to the SEC4 via job ring.
To compile this as a module, choose M here: the module
will be called caamhash.
config CRYPTO_DEV_FSL_CAAM_RNG_API
tristate "Register caam device for hwrng API"
depends on CRYPTO_DEV_FSL_CAAM
default y
select CRYPTO_RNG
select HW_RANDOM
help
Selecting this will register the SEC4 hardware rng to
the hw_random API for suppying the kernel entropy pool.
To compile this as a module, choose M here: the module
will be called caamrng.

View File

@ -4,5 +4,7 @@
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
caam-objs := ctrl.o jr.o error.o
caam-objs := ctrl.o jr.o error.o key_gen.o

View File

@ -37,9 +37,10 @@
* | ShareDesc Pointer |
* | SEQ_OUT_PTR |
* | (output buffer) |
* | (output length) |
* | SEQ_IN_PTR |
* | (input buffer) |
* | LOAD (to DECO) |
* | (input length) |
* ---------------------
*/
@ -50,6 +51,8 @@
#include "desc_constr.h"
#include "jr.h"
#include "error.h"
#include "sg_sw_sec4.h"
#include "key_gen.h"
/*
* crypto alg
@ -62,7 +65,7 @@
#define CAAM_MAX_IV_LENGTH 16
/* length of descriptors text */
#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 3 + CAAM_PTR_SZ * 3)
#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3)
#define DESC_AEAD_BASE (4 * CAAM_CMD_SZ)
#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 16 * CAAM_CMD_SZ)
@ -143,11 +146,11 @@ static inline void aead_append_ld_iv(u32 *desc, int ivsize)
*/
static inline void ablkcipher_append_src_dst(u32 *desc)
{
append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \
append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \
append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | \
KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); \
append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); \
append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
}
/*
@ -452,121 +455,12 @@ static int aead_setauthsize(struct crypto_aead *authenc,
return 0;
}
struct split_key_result {
struct completion completion;
int err;
};
static void split_key_done(struct device *dev, u32 *desc, u32 err,
void *context)
static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
u32 authkeylen)
{
struct split_key_result *res = context;
#ifdef DEBUG
dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
if (err) {
char tmp[CAAM_ERROR_STR_MAX];
dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
}
res->err = err;
complete(&res->completion);
}
/*
get a split ipad/opad key
Split key generation-----------------------------------------------
[00] 0xb0810008 jobdesc: stidx=1 share=never len=8
[01] 0x04000014 key: class2->keyreg len=20
@0xffe01000
[03] 0x84410014 operation: cls2-op sha1 hmac init dec
[04] 0x24940000 fifold: class2 msgdata-last2 len=0 imm
[05] 0xa4000001 jump: class2 local all ->1 [06]
[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40
@0xffe04000
*/
static u32 gen_split_key(struct caam_ctx *ctx, const u8 *key_in, u32 authkeylen)
{
struct device *jrdev = ctx->jrdev;
u32 *desc;
struct split_key_result result;
dma_addr_t dma_addr_in, dma_addr_out;
int ret = 0;
desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
init_job_desc(desc, 0);
dma_addr_in = dma_map_single(jrdev, (void *)key_in, authkeylen,
DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, dma_addr_in)) {
dev_err(jrdev, "unable to map key input memory\n");
kfree(desc);
return -ENOMEM;
}
append_key(desc, dma_addr_in, authkeylen, CLASS_2 |
KEY_DEST_CLASS_REG);
/* Sets MDHA up into an HMAC-INIT */
append_operation(desc, ctx->alg_op | OP_ALG_DECRYPT |
OP_ALG_AS_INIT);
/*
* do a FIFO_LOAD of zero, this will trigger the internal key expansion
into both pads inside MDHA
*/
append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
/*
* FIFO_STORE with the explicit split-key content store
* (0x26 output type)
*/
dma_addr_out = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
DMA_FROM_DEVICE);
if (dma_mapping_error(jrdev, dma_addr_out)) {
dev_err(jrdev, "unable to map key output memory\n");
kfree(desc);
return -ENOMEM;
}
append_fifo_store(desc, dma_addr_out, ctx->split_key_len,
LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, key_in, authkeylen, 1);
print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
#endif
result.err = 0;
init_completion(&result.completion);
ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
if (!ret) {
/* in progress */
wait_for_completion_interruptible(&result.completion);
ret = result.err;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
ctx->split_key_pad_len, 1);
#endif
}
dma_unmap_single(jrdev, dma_addr_out, ctx->split_key_pad_len,
DMA_FROM_DEVICE);
dma_unmap_single(jrdev, dma_addr_in, authkeylen, DMA_TO_DEVICE);
kfree(desc);
return ret;
return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
ctx->split_key_pad_len, key_in, authkeylen,
ctx->alg_op);
}
static int aead_setkey(struct crypto_aead *aead,
@ -610,7 +504,7 @@ static int aead_setkey(struct crypto_aead *aead,
DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
#endif
ret = gen_split_key(ctx, key, authkeylen);
ret = gen_split_aead_key(ctx, key, authkeylen);
if (ret) {
goto badkey;
}
@ -757,72 +651,78 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
return ret;
}
struct link_tbl_entry {
u64 ptr;
u32 len;
u8 reserved;
u8 buf_pool_id;
u16 offset;
};
/*
* aead_edesc - s/w-extended aead descriptor
* @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
* @assoc_chained: if source is chained
* @src_nents: number of segments in input scatterlist
* @src_chained: if source is chained
* @dst_nents: number of segments in output scatterlist
* @dst_chained: if destination is chained
* @iv_dma: dma address of iv for checking continuity and link table
* @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
* @link_tbl_bytes: length of dma mapped link_tbl space
* @link_tbl_dma: bus physical mapped address of h/w link table
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
*/
struct aead_edesc {
int assoc_nents;
bool assoc_chained;
int src_nents;
bool src_chained;
int dst_nents;
bool dst_chained;
dma_addr_t iv_dma;
int link_tbl_bytes;
dma_addr_t link_tbl_dma;
struct link_tbl_entry *link_tbl;
int sec4_sg_bytes;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
u32 hw_desc[0];
};
/*
* ablkcipher_edesc - s/w-extended ablkcipher descriptor
* @src_nents: number of segments in input scatterlist
* @src_chained: if source is chained
* @dst_nents: number of segments in output scatterlist
* @dst_chained: if destination is chained
* @iv_dma: dma address of iv for checking continuity and link table
* @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
* @link_tbl_bytes: length of dma mapped link_tbl space
* @link_tbl_dma: bus physical mapped address of h/w link table
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
*/
struct ablkcipher_edesc {
int src_nents;
bool src_chained;
int dst_nents;
bool dst_chained;
dma_addr_t iv_dma;
int link_tbl_bytes;
dma_addr_t link_tbl_dma;
struct link_tbl_entry *link_tbl;
int sec4_sg_bytes;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
u32 hw_desc[0];
};
static void caam_unmap(struct device *dev, struct scatterlist *src,
struct scatterlist *dst, int src_nents, int dst_nents,
dma_addr_t iv_dma, int ivsize, dma_addr_t link_tbl_dma,
int link_tbl_bytes)
struct scatterlist *dst, int src_nents,
bool src_chained, int dst_nents, bool dst_chained,
dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
int sec4_sg_bytes)
{
if (unlikely(dst != src)) {
dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
if (dst != src) {
dma_unmap_sg_chained(dev, src, src_nents ? : 1, DMA_TO_DEVICE,
src_chained);
dma_unmap_sg_chained(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE,
dst_chained);
} else {
dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
dma_unmap_sg_chained(dev, src, src_nents ? : 1,
DMA_BIDIRECTIONAL, src_chained);
}
if (iv_dma)
dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
if (link_tbl_bytes)
dma_unmap_single(dev, link_tbl_dma, link_tbl_bytes,
if (sec4_sg_bytes)
dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
DMA_TO_DEVICE);
}
@ -833,12 +733,13 @@ static void aead_unmap(struct device *dev,
struct crypto_aead *aead = crypto_aead_reqtfm(req);
int ivsize = crypto_aead_ivsize(aead);
dma_unmap_sg(dev, req->assoc, edesc->assoc_nents, DMA_TO_DEVICE);
dma_unmap_sg_chained(dev, req->assoc, edesc->assoc_nents,
DMA_TO_DEVICE, edesc->assoc_chained);
caam_unmap(dev, req->src, req->dst,
edesc->src_nents, edesc->dst_nents,
edesc->iv_dma, ivsize, edesc->link_tbl_dma,
edesc->link_tbl_bytes);
edesc->src_nents, edesc->src_chained, edesc->dst_nents,
edesc->dst_chained, edesc->iv_dma, ivsize,
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
}
static void ablkcipher_unmap(struct device *dev,
@ -849,9 +750,9 @@ static void ablkcipher_unmap(struct device *dev,
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
caam_unmap(dev, req->src, req->dst,
edesc->src_nents, edesc->dst_nents,
edesc->iv_dma, ivsize, edesc->link_tbl_dma,
edesc->link_tbl_bytes);
edesc->src_nents, edesc->src_chained, edesc->dst_nents,
edesc->dst_chained, edesc->iv_dma, ivsize,
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
}
static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
@ -942,7 +843,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
sizeof(struct iphdr) + req->assoclen +
((req->cryptlen > 1500) ? 1500 : req->cryptlen) +
ctx->authsize + 36, 1);
if (!err && edesc->link_tbl_bytes) {
if (!err && edesc->sec4_sg_bytes) {
struct scatterlist *sg = sg_last(req->src, edesc->src_nents);
print_hex_dump(KERN_ERR, "sglastout@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(sg),
@ -1026,50 +927,6 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
ablkcipher_request_complete(req, err);
}
static void sg_to_link_tbl_one(struct link_tbl_entry *link_tbl_ptr,
dma_addr_t dma, u32 len, u32 offset)
{
link_tbl_ptr->ptr = dma;
link_tbl_ptr->len = len;
link_tbl_ptr->reserved = 0;
link_tbl_ptr->buf_pool_id = 0;
link_tbl_ptr->offset = offset;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "link_tbl_ptr@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, link_tbl_ptr,
sizeof(struct link_tbl_entry), 1);
#endif
}
/*
* convert scatterlist to h/w link table format
* but does not have final bit; instead, returns last entry
*/
static struct link_tbl_entry *sg_to_link_tbl(struct scatterlist *sg,
int sg_count, struct link_tbl_entry
*link_tbl_ptr, u32 offset)
{
while (sg_count) {
sg_to_link_tbl_one(link_tbl_ptr, sg_dma_address(sg),
sg_dma_len(sg), offset);
link_tbl_ptr++;
sg = sg_next(sg);
sg_count--;
}
return link_tbl_ptr - 1;
}
/*
* convert scatterlist to h/w link table format
* scatterlist must have been previously dma mapped
*/
static void sg_to_link_tbl_last(struct scatterlist *sg, int sg_count,
struct link_tbl_entry *link_tbl_ptr, u32 offset)
{
link_tbl_ptr = sg_to_link_tbl(sg, sg_count, link_tbl_ptr, offset);
link_tbl_ptr->len |= 0x40000000;
}
/*
* Fill in aead job descriptor
*/
@ -1085,7 +942,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
u32 *desc = edesc->hw_desc;
u32 out_options = 0, in_options;
dma_addr_t dst_dma, src_dma;
int len, link_tbl_index = 0;
int len, sec4_sg_index = 0;
#ifdef DEBUG
debug("assoclen %d cryptlen %d authsize %d\n",
@ -1111,9 +968,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
src_dma = sg_dma_address(req->assoc);
in_options = 0;
} else {
src_dma = edesc->link_tbl_dma;
link_tbl_index += (edesc->assoc_nents ? : 1) + 1 +
(edesc->src_nents ? : 1);
src_dma = edesc->sec4_sg_dma;
sec4_sg_index += (edesc->assoc_nents ? : 1) + 1 +
(edesc->src_nents ? : 1);
in_options = LDST_SGF;
}
if (encrypt)
@ -1127,7 +984,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
if (all_contig) {
dst_dma = sg_dma_address(req->src);
} else {
dst_dma = src_dma + sizeof(struct link_tbl_entry) *
dst_dma = src_dma + sizeof(struct sec4_sg_entry) *
((edesc->assoc_nents ? : 1) + 1);
out_options = LDST_SGF;
}
@ -1135,9 +992,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
if (!edesc->dst_nents) {
dst_dma = sg_dma_address(req->dst);
} else {
dst_dma = edesc->link_tbl_dma +
link_tbl_index *
sizeof(struct link_tbl_entry);
dst_dma = edesc->sec4_sg_dma +
sec4_sg_index *
sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
}
@ -1163,7 +1020,7 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
u32 *desc = edesc->hw_desc;
u32 out_options = 0, in_options;
dma_addr_t dst_dma, src_dma;
int len, link_tbl_index = 0;
int len, sec4_sg_index = 0;
#ifdef DEBUG
debug("assoclen %d cryptlen %d authsize %d\n",
@ -1188,8 +1045,8 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
src_dma = sg_dma_address(req->assoc);
in_options = 0;
} else {
src_dma = edesc->link_tbl_dma;
link_tbl_index += edesc->assoc_nents + 1 + edesc->src_nents;
src_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->assoc_nents + 1 + edesc->src_nents;
in_options = LDST_SGF;
}
append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize +
@ -1199,13 +1056,13 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr,
dst_dma = edesc->iv_dma;
} else {
if (likely(req->src == req->dst)) {
dst_dma = src_dma + sizeof(struct link_tbl_entry) *
dst_dma = src_dma + sizeof(struct sec4_sg_entry) *
edesc->assoc_nents;
out_options = LDST_SGF;
} else {
dst_dma = edesc->link_tbl_dma +
link_tbl_index *
sizeof(struct link_tbl_entry);
dst_dma = edesc->sec4_sg_dma +
sec4_sg_index *
sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
}
@ -1226,7 +1083,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
u32 *desc = edesc->hw_desc;
u32 out_options = 0, in_options;
dma_addr_t dst_dma, src_dma;
int len, link_tbl_index = 0;
int len, sec4_sg_index = 0;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ",
@ -1244,8 +1101,8 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
src_dma = edesc->iv_dma;
in_options = 0;
} else {
src_dma = edesc->link_tbl_dma;
link_tbl_index += (iv_contig ? 0 : 1) + edesc->src_nents;
src_dma = edesc->sec4_sg_dma;
sec4_sg_index += (iv_contig ? 0 : 1) + edesc->src_nents;
in_options = LDST_SGF;
}
append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
@ -1254,44 +1111,22 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
if (!edesc->src_nents && iv_contig) {
dst_dma = sg_dma_address(req->src);
} else {
dst_dma = edesc->link_tbl_dma +
sizeof(struct link_tbl_entry);
dst_dma = edesc->sec4_sg_dma +
sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
} else {
if (!edesc->dst_nents) {
dst_dma = sg_dma_address(req->dst);
} else {
dst_dma = edesc->link_tbl_dma +
link_tbl_index * sizeof(struct link_tbl_entry);
dst_dma = edesc->sec4_sg_dma +
sec4_sg_index * sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
}
append_seq_out_ptr(desc, dst_dma, req->nbytes, out_options);
}
/*
* derive number of elements in scatterlist
*/
static int sg_count(struct scatterlist *sg_list, int nbytes)
{
struct scatterlist *sg = sg_list;
int sg_nents = 0;
while (nbytes > 0) {
sg_nents++;
nbytes -= sg->length;
if (!sg_is_last(sg) && (sg + 1)->length == 0)
BUG(); /* Not support chaining */
sg = scatterwalk_sg_next(sg);
}
if (likely(sg_nents == 1))
return 0;
return sg_nents;
}
/*
* allocate and map the aead extended descriptor
*/
@ -1308,25 +1143,26 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dma_addr_t iv_dma = 0;
int sgc;
bool all_contig = true;
bool assoc_chained = false, src_chained = false, dst_chained = false;
int ivsize = crypto_aead_ivsize(aead);
int link_tbl_index, link_tbl_len = 0, link_tbl_bytes;
int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
assoc_nents = sg_count(req->assoc, req->assoclen);
src_nents = sg_count(req->src, req->cryptlen);
assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained);
src_nents = sg_count(req->src, req->cryptlen, &src_chained);
if (unlikely(req->dst != req->src))
dst_nents = sg_count(req->dst, req->cryptlen);
dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained);
sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1,
DMA_BIDIRECTIONAL);
sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1,
DMA_BIDIRECTIONAL, assoc_chained);
if (likely(req->src == req->dst)) {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL, src_chained);
} else {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE);
sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE, src_chained);
sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE, dst_chained);
}
/* Check if data are contiguous */
@ -1337,50 +1173,53 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
all_contig = false;
assoc_nents = assoc_nents ? : 1;
src_nents = src_nents ? : 1;
link_tbl_len = assoc_nents + 1 + src_nents;
sec4_sg_len = assoc_nents + 1 + src_nents;
}
link_tbl_len += dst_nents;
sec4_sg_len += dst_nents;
link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry);
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
link_tbl_bytes, GFP_DMA | flags);
sec4_sg_bytes, GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
}
edesc->assoc_nents = assoc_nents;
edesc->assoc_chained = assoc_chained;
edesc->src_nents = src_nents;
edesc->src_chained = src_chained;
edesc->dst_nents = dst_nents;
edesc->dst_chained = dst_chained;
edesc->iv_dma = iv_dma;
edesc->link_tbl_bytes = link_tbl_bytes;
edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) +
desc_bytes;
edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
link_tbl_bytes, DMA_TO_DEVICE);
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
desc_bytes;
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
*all_contig_ptr = all_contig;
link_tbl_index = 0;
sec4_sg_index = 0;
if (!all_contig) {
sg_to_link_tbl(req->assoc,
(assoc_nents ? : 1),
edesc->link_tbl +
link_tbl_index, 0);
link_tbl_index += assoc_nents ? : 1;
sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
sg_to_sec4_sg(req->assoc,
(assoc_nents ? : 1),
edesc->sec4_sg +
sec4_sg_index, 0);
sec4_sg_index += assoc_nents ? : 1;
dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
iv_dma, ivsize, 0);
link_tbl_index += 1;
sg_to_link_tbl_last(req->src,
(src_nents ? : 1),
edesc->link_tbl +
link_tbl_index, 0);
link_tbl_index += src_nents ? : 1;
sec4_sg_index += 1;
sg_to_sec4_sg_last(req->src,
(src_nents ? : 1),
edesc->sec4_sg +
sec4_sg_index, 0);
sec4_sg_index += src_nents ? : 1;
}
if (dst_nents) {
sg_to_link_tbl_last(req->dst, dst_nents,
edesc->link_tbl + link_tbl_index, 0);
sg_to_sec4_sg_last(req->dst, dst_nents,
edesc->sec4_sg + sec4_sg_index, 0);
}
return edesc;
@ -1487,24 +1326,25 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
int sgc;
u32 contig = GIV_SRC_CONTIG | GIV_DST_CONTIG;
int ivsize = crypto_aead_ivsize(aead);
int link_tbl_index, link_tbl_len = 0, link_tbl_bytes;
bool assoc_chained = false, src_chained = false, dst_chained = false;
int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
assoc_nents = sg_count(req->assoc, req->assoclen);
src_nents = sg_count(req->src, req->cryptlen);
assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained);
src_nents = sg_count(req->src, req->cryptlen, &src_chained);
if (unlikely(req->dst != req->src))
dst_nents = sg_count(req->dst, req->cryptlen);
dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained);
sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1,
DMA_BIDIRECTIONAL);
sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1,
DMA_BIDIRECTIONAL, assoc_chained);
if (likely(req->src == req->dst)) {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL, src_chained);
} else {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE);
sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE, src_chained);
sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE, dst_chained);
}
/* Check if data are contiguous */
@ -1516,58 +1356,61 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
contig &= ~GIV_DST_CONTIG;
if (unlikely(req->src != req->dst)) {
dst_nents = dst_nents ? : 1;
link_tbl_len += 1;
sec4_sg_len += 1;
}
if (!(contig & GIV_SRC_CONTIG)) {
assoc_nents = assoc_nents ? : 1;
src_nents = src_nents ? : 1;
link_tbl_len += assoc_nents + 1 + src_nents;
sec4_sg_len += assoc_nents + 1 + src_nents;
if (likely(req->src == req->dst))
contig &= ~GIV_DST_CONTIG;
}
link_tbl_len += dst_nents;
sec4_sg_len += dst_nents;
link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry);
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
link_tbl_bytes, GFP_DMA | flags);
sec4_sg_bytes, GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
}
edesc->assoc_nents = assoc_nents;
edesc->assoc_chained = assoc_chained;
edesc->src_nents = src_nents;
edesc->src_chained = src_chained;
edesc->dst_nents = dst_nents;
edesc->dst_chained = dst_chained;
edesc->iv_dma = iv_dma;
edesc->link_tbl_bytes = link_tbl_bytes;
edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) +
desc_bytes;
edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
link_tbl_bytes, DMA_TO_DEVICE);
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
desc_bytes;
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
*contig_ptr = contig;
link_tbl_index = 0;
sec4_sg_index = 0;
if (!(contig & GIV_SRC_CONTIG)) {
sg_to_link_tbl(req->assoc, assoc_nents,
edesc->link_tbl +
link_tbl_index, 0);
link_tbl_index += assoc_nents;
sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
sg_to_sec4_sg(req->assoc, assoc_nents,
edesc->sec4_sg +
sec4_sg_index, 0);
sec4_sg_index += assoc_nents;
dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
iv_dma, ivsize, 0);
link_tbl_index += 1;
sg_to_link_tbl_last(req->src, src_nents,
edesc->link_tbl +
link_tbl_index, 0);
link_tbl_index += src_nents;
sec4_sg_index += 1;
sg_to_sec4_sg_last(req->src, src_nents,
edesc->sec4_sg +
sec4_sg_index, 0);
sec4_sg_index += src_nents;
}
if (unlikely(req->src != req->dst && !(contig & GIV_DST_CONTIG))) {
sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index,
dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
iv_dma, ivsize, 0);
link_tbl_index += 1;
sg_to_link_tbl_last(req->dst, dst_nents,
edesc->link_tbl + link_tbl_index, 0);
sec4_sg_index += 1;
sg_to_sec4_sg_last(req->dst, dst_nents,
edesc->sec4_sg + sec4_sg_index, 0);
}
return edesc;
@ -1633,27 +1476,28 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
CRYPTO_TFM_REQ_MAY_SLEEP)) ?
GFP_KERNEL : GFP_ATOMIC;
int src_nents, dst_nents = 0, link_tbl_bytes;
int src_nents, dst_nents = 0, sec4_sg_bytes;
struct ablkcipher_edesc *edesc;
dma_addr_t iv_dma = 0;
bool iv_contig = false;
int sgc;
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
int link_tbl_index;
bool src_chained = false, dst_chained = false;
int sec4_sg_index;
src_nents = sg_count(req->src, req->nbytes);
src_nents = sg_count(req->src, req->nbytes, &src_chained);
if (unlikely(req->dst != req->src))
dst_nents = sg_count(req->dst, req->nbytes);
if (req->dst != req->src)
dst_nents = sg_count(req->dst, req->nbytes, &dst_chained);
if (likely(req->src == req->dst)) {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL, src_chained);
} else {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE);
sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE);
sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE, src_chained);
sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE, dst_chained);
}
/*
@ -1665,44 +1509,46 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
iv_contig = true;
else
src_nents = src_nents ? : 1;
link_tbl_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
sizeof(struct link_tbl_entry);
sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) *
sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
edesc = kmalloc(sizeof(struct ablkcipher_edesc) + desc_bytes +
link_tbl_bytes, GFP_DMA | flags);
sec4_sg_bytes, GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
}
edesc->src_nents = src_nents;
edesc->src_chained = src_chained;
edesc->dst_nents = dst_nents;
edesc->link_tbl_bytes = link_tbl_bytes;
edesc->link_tbl = (void *)edesc + sizeof(struct ablkcipher_edesc) +
desc_bytes;
edesc->dst_chained = dst_chained;
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
desc_bytes;
link_tbl_index = 0;
sec4_sg_index = 0;
if (!iv_contig) {
sg_to_link_tbl_one(edesc->link_tbl, iv_dma, ivsize, 0);
sg_to_link_tbl_last(req->src, src_nents,
edesc->link_tbl + 1, 0);
link_tbl_index += 1 + src_nents;
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
sg_to_sec4_sg_last(req->src, src_nents,
edesc->sec4_sg + 1, 0);
sec4_sg_index += 1 + src_nents;
}
if (unlikely(dst_nents)) {
sg_to_link_tbl_last(req->dst, dst_nents,
edesc->link_tbl + link_tbl_index, 0);
if (dst_nents) {
sg_to_sec4_sg_last(req->dst, dst_nents,
edesc->sec4_sg + sec4_sg_index, 0);
}
edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
link_tbl_bytes, DMA_TO_DEVICE);
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
edesc->iv_dma = iv_dma;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ablkcipher link_tbl@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->link_tbl,
link_tbl_bytes, 1);
print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
sec4_sg_bytes, 1);
#endif
*iv_contig_out = iv_contig;
@ -2227,7 +2073,7 @@ static int caam_cra_init(struct crypto_tfm *tfm)
* distribute tfms across job rings to ensure in-order
* crypto request processing per tfm
*/
ctx->jrdev = priv->algapi_jr[(tgt_jr / 2) % priv->num_jrs_for_algapi];
ctx->jrdev = priv->jrdev[(tgt_jr / 2) % priv->total_jobrs];
/* copy descriptor header template value */
ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type;
@ -2264,7 +2110,6 @@ static void __exit caam_algapi_exit(void)
struct device *ctrldev;
struct caam_drv_private *priv;
struct caam_crypto_alg *t_alg, *n;
int i, err;
dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
if (!dev_node) {
@ -2289,13 +2134,6 @@ static void __exit caam_algapi_exit(void)
list_del(&t_alg->entry);
kfree(t_alg);
}
for (i = 0; i < priv->total_jobrs; i++) {
err = caam_jr_deregister(priv->algapi_jr[i]);
if (err < 0)
break;
}
kfree(priv->algapi_jr);
}
static struct caam_crypto_alg *caam_alg_alloc(struct device *ctrldev,
@ -2348,7 +2186,7 @@ static int __init caam_algapi_init(void)
{
struct device_node *dev_node;
struct platform_device *pdev;
struct device *ctrldev, **jrdev;
struct device *ctrldev;
struct caam_drv_private *priv;
int i = 0, err = 0;
@ -2369,24 +2207,6 @@ static int __init caam_algapi_init(void)
INIT_LIST_HEAD(&priv->alg_list);
jrdev = kmalloc(sizeof(*jrdev) * priv->total_jobrs, GFP_KERNEL);
if (!jrdev)
return -ENOMEM;
for (i = 0; i < priv->total_jobrs; i++) {
err = caam_jr_register(ctrldev, &jrdev[i]);
if (err < 0)
break;
}
if (err < 0 && i == 0) {
dev_err(ctrldev, "algapi error in job ring registration: %d\n",
err);
kfree(jrdev);
return err;
}
priv->num_jrs_for_algapi = i;
priv->algapi_jr = jrdev;
atomic_set(&priv->tfm_count, -1);
/* register crypto algorithms the device supports */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,309 @@
/*
* caam - Freescale FSL CAAM support for hw_random
*
* Copyright 2011 Freescale Semiconductor, Inc.
*
* Based on caamalg.c crypto API driver.
*
* relationship between job descriptors to shared descriptors:
*
* --------------- --------------
* | JobDesc #0 |-------------------->| ShareDesc |
* | *(buffer 0) | |------------->| (generate) |
* --------------- | | (move) |
* | | (store) |
* --------------- | --------------
* | JobDesc #1 |------|
* | *(buffer 1) |
* ---------------
*
* A job desc looks like this:
*
* ---------------------
* | Header |
* | ShareDesc Pointer |
* | SEQ_OUT_PTR |
* | (output buffer) |
* ---------------------
*
* The SharedDesc never changes, and each job descriptor points to one of two
* buffers for each device, from which the data will be copied into the
* requested destination
*/
#include <linux/hw_random.h>
#include <linux/completion.h>
#include <linux/atomic.h>
#include "compat.h"
#include "regs.h"
#include "intern.h"
#include "desc_constr.h"
#include "jr.h"
#include "error.h"
/*
* Maximum buffer size: maximum number of random, cache-aligned bytes that
* will be generated and moved to seq out ptr (extlen not allowed)
*/
#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \
L1_CACHE_BYTES)
/* length of descriptors */
#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
#define DESC_RNG_LEN (10 * CAAM_CMD_SZ)
/* Buffer, its dma address and lock */
struct buf_data {
u8 buf[RN_BUF_SIZE];
dma_addr_t addr;
struct completion filled;
u32 hw_desc[DESC_JOB_O_LEN];
#define BUF_NOT_EMPTY 0
#define BUF_EMPTY 1
#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */
atomic_t empty;
};
/* rng per-device context */
struct caam_rng_ctx {
struct device *jrdev;
dma_addr_t sh_desc_dma;
u32 sh_desc[DESC_RNG_LEN];
unsigned int cur_buf_idx;
int current_buf;
struct buf_data bufs[2];
};
static struct caam_rng_ctx rng_ctx;
static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
{
if (bd->addr)
dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
DMA_FROM_DEVICE);
}
static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
{
struct device *jrdev = ctx->jrdev;
if (ctx->sh_desc_dma)
dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN,
DMA_TO_DEVICE);
rng_unmap_buf(jrdev, &ctx->bufs[0]);
rng_unmap_buf(jrdev, &ctx->bufs[1]);
}
static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
{
struct buf_data *bd;
bd = (struct buf_data *)((char *)desc -
offsetof(struct buf_data, hw_desc));
if (err) {
char tmp[CAAM_ERROR_STR_MAX];
dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
}
atomic_set(&bd->empty, BUF_NOT_EMPTY);
complete(&bd->filled);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "rng refreshed buf@: ",
DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1);
#endif
}
static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
{
struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
struct device *jrdev = ctx->jrdev;
u32 *desc = bd->hw_desc;
int err;
dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
init_completion(&bd->filled);
err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
if (err)
complete(&bd->filled); /* don't wait on failed job*/
else
atomic_inc(&bd->empty); /* note if pending */
return err;
}
static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
{
struct caam_rng_ctx *ctx = &rng_ctx;
struct buf_data *bd = &ctx->bufs[ctx->current_buf];
int next_buf_idx, copied_idx;
int err;
if (atomic_read(&bd->empty)) {
/* try to submit job if there wasn't one */
if (atomic_read(&bd->empty) == BUF_EMPTY) {
err = submit_job(ctx, 1);
/* if can't submit job, can't even wait */
if (err)
return 0;
}
/* no immediate data, so exit if not waiting */
if (!wait)
return 0;
/* waiting for pending job */
if (atomic_read(&bd->empty))
wait_for_completion(&bd->filled);
}
next_buf_idx = ctx->cur_buf_idx + max;
dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
__func__, ctx->current_buf, ctx->cur_buf_idx);
/* if enough data in current buffer */
if (next_buf_idx < RN_BUF_SIZE) {
memcpy(data, bd->buf + ctx->cur_buf_idx, max);
ctx->cur_buf_idx = next_buf_idx;
return max;
}
/* else, copy what's left... */
copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
ctx->cur_buf_idx = 0;
atomic_set(&bd->empty, BUF_EMPTY);
/* ...refill... */
submit_job(ctx, 1);
/* and use next buffer */
ctx->current_buf = !ctx->current_buf;
dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
/* since there already is some data read, don't wait */
return copied_idx + caam_read(rng, data + copied_idx,
max - copied_idx, false);
}
static inline void rng_create_sh_desc(struct caam_rng_ctx *ctx)
{
struct device *jrdev = ctx->jrdev;
u32 *desc = ctx->sh_desc;
init_sh_desc(desc, HDR_SHARE_WAIT);
/* Propagate errors from shared to job descriptor */
append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
/* Generate random bytes */
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
/* Store bytes */
append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
DMA_TO_DEVICE);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
desc, desc_bytes(desc), 1);
#endif
}
static inline void rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
{
struct device *jrdev = ctx->jrdev;
struct buf_data *bd = &ctx->bufs[buf_id];
u32 *desc = bd->hw_desc;
int sh_len = desc_len(ctx->sh_desc);
init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
HDR_REVERSE);
bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
desc, desc_bytes(desc), 1);
#endif
}
static void caam_cleanup(struct hwrng *rng)
{
int i;
struct buf_data *bd;
for (i = 0; i < 2; i++) {
bd = &rng_ctx.bufs[i];
if (atomic_read(&bd->empty) == BUF_PENDING)
wait_for_completion(&bd->filled);
}
rng_unmap_ctx(&rng_ctx);
}
static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
{
struct buf_data *bd = &ctx->bufs[buf_id];
rng_create_job_desc(ctx, buf_id);
atomic_set(&bd->empty, BUF_EMPTY);
submit_job(ctx, buf_id == ctx->current_buf);
wait_for_completion(&bd->filled);
}
static void caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
{
ctx->jrdev = jrdev;
rng_create_sh_desc(ctx);
ctx->current_buf = 0;
ctx->cur_buf_idx = 0;
caam_init_buf(ctx, 0);
caam_init_buf(ctx, 1);
}
static struct hwrng caam_rng = {
.name = "rng-caam",
.cleanup = caam_cleanup,
.read = caam_read,
};
static void __exit caam_rng_exit(void)
{
hwrng_unregister(&caam_rng);
}
static int __init caam_rng_init(void)
{
struct device_node *dev_node;
struct platform_device *pdev;
struct device *ctrldev;
struct caam_drv_private *priv;
dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
if (!dev_node)
return -ENODEV;
pdev = of_find_device_by_node(dev_node);
if (!pdev)
return -ENODEV;
ctrldev = &pdev->dev;
priv = dev_get_drvdata(ctrldev);
of_node_put(dev_node);
caam_init_rng(&rng_ctx, priv->jrdev[0]);
dev_info(priv->jrdev[0], "registering rng-caam\n");
return hwrng_register(&caam_rng);
}
module_init(caam_rng_init);
module_exit(caam_rng_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("FSL CAAM support for hw_random API");
MODULE_AUTHOR("Freescale Semiconductor - NMG");

View File

@ -11,6 +11,7 @@
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/crypto.h>
#include <linux/hash.h>
#include <linux/hw_random.h>
#include <linux/of_platform.h>
#include <linux/dma-mapping.h>
@ -33,5 +34,6 @@
#include <crypto/authenc.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/skcipher.h>
#include <crypto/internal/hash.h>
#endif /* !defined(CAAM_COMPAT_H) */

View File

@ -2,13 +2,16 @@
* CAAM control-plane driver backend
* Controller-level driver, kernel property detection, initialization
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
* Copyright 2008-2012 Freescale Semiconductor, Inc.
*/
#include "compat.h"
#include "regs.h"
#include "intern.h"
#include "jr.h"
#include "desc_constr.h"
#include "error.h"
#include "ctrl.h"
static int caam_remove(struct platform_device *pdev)
{
@ -43,10 +46,154 @@ static int caam_remove(struct platform_device *pdev)
return ret;
}
/*
* Descriptor to instantiate RNG State Handle 0 in normal mode and
* load the JDKEK, TDKEK and TDSK registers
*/
static void build_instantiation_desc(u32 *desc)
{
u32 *jump_cmd;
init_job_desc(desc, 0);
/* INIT RNG in non-test mode */
append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
OP_ALG_AS_INIT);
/* wait for done */
jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1);
set_jump_tgt_here(desc, jump_cmd);
/*
* load 1 to clear written reg:
* resets the done interrrupt and returns the RNG to idle.
*/
append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW);
/* generate secure keys (non-test) */
append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
OP_ALG_RNG4_SK);
}
struct instantiate_result {
struct completion completion;
int err;
};
static void rng4_init_done(struct device *dev, u32 *desc, u32 err,
void *context)
{
struct instantiate_result *instantiation = context;
if (err) {
char tmp[CAAM_ERROR_STR_MAX];
dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
}
instantiation->err = err;
complete(&instantiation->completion);
}
static int instantiate_rng(struct device *jrdev)
{
struct instantiate_result instantiation;
dma_addr_t desc_dma;
u32 *desc;
int ret;
desc = kmalloc(CAAM_CMD_SZ * 6, GFP_KERNEL | GFP_DMA);
if (!desc) {
dev_err(jrdev, "cannot allocate RNG init descriptor memory\n");
return -ENOMEM;
}
build_instantiation_desc(desc);
desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE);
init_completion(&instantiation.completion);
ret = caam_jr_enqueue(jrdev, desc, rng4_init_done, &instantiation);
if (!ret) {
wait_for_completion_interruptible(&instantiation.completion);
ret = instantiation.err;
if (ret)
dev_err(jrdev, "unable to instantiate RNG\n");
}
dma_unmap_single(jrdev, desc_dma, desc_bytes(desc), DMA_TO_DEVICE);
kfree(desc);
return ret;
}
/*
* By default, the TRNG runs for 200 clocks per sample;
* 800 clocks per sample generates better entropy.
*/
static void kick_trng(struct platform_device *pdev)
{
struct device *ctrldev = &pdev->dev;
struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
struct caam_full __iomem *topregs;
struct rng4tst __iomem *r4tst;
u32 val;
topregs = (struct caam_full __iomem *)ctrlpriv->ctrl;
r4tst = &topregs->ctrl.r4tst[0];
/* put RNG4 into program mode */
setbits32(&r4tst->rtmctl, RTMCTL_PRGM);
/* 800 clocks per sample */
val = rd_reg32(&r4tst->rtsdctl);
val = (val & ~RTSDCTL_ENT_DLY_MASK) | (800 << RTSDCTL_ENT_DLY_SHIFT);
wr_reg32(&r4tst->rtsdctl, val);
/* min. freq. count */
wr_reg32(&r4tst->rtfrqmin, 400);
/* max. freq. count */
wr_reg32(&r4tst->rtfrqmax, 6400);
/* put RNG4 into run mode */
clrbits32(&r4tst->rtmctl, RTMCTL_PRGM);
}
/**
* caam_get_era() - Return the ERA of the SEC on SoC, based
* on the SEC_VID register.
* Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown.
* @caam_id - the value of the SEC_VID register
**/
int caam_get_era(u64 caam_id)
{
struct sec_vid *sec_vid = (struct sec_vid *)&caam_id;
static const struct {
u16 ip_id;
u8 maj_rev;
u8 era;
} caam_eras[] = {
{0x0A10, 1, 1},
{0x0A10, 2, 2},
{0x0A12, 1, 3},
{0x0A14, 1, 3},
{0x0A14, 2, 4},
{0x0A16, 1, 4},
{0x0A11, 1, 4}
};
int i;
for (i = 0; i < ARRAY_SIZE(caam_eras); i++)
if (caam_eras[i].ip_id == sec_vid->ip_id &&
caam_eras[i].maj_rev == sec_vid->maj_rev)
return caam_eras[i].era;
return -ENOTSUPP;
}
EXPORT_SYMBOL(caam_get_era);
/* Probe routine for CAAM top (controller) level */
static int caam_probe(struct platform_device *pdev)
{
int ring, rspec;
int ret, ring, rspec;
u64 caam_id;
struct device *dev;
struct device_node *nprop, *np;
struct caam_ctrl __iomem *ctrl;
@ -82,13 +229,18 @@ static int caam_probe(struct platform_device *pdev)
/*
* Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
* 36-bit pointers in master configuration register
* long pointers in master configuration register
*/
setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE |
(sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
if (sizeof(dma_addr_t) == sizeof(u64))
dma_set_mask(dev, DMA_BIT_MASK(36));
if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
dma_set_mask(dev, DMA_BIT_MASK(40));
else
dma_set_mask(dev, DMA_BIT_MASK(36));
else
dma_set_mask(dev, DMA_BIT_MASK(32));
/*
* Detect and enable JobRs
@ -141,14 +293,29 @@ static int caam_probe(struct platform_device *pdev)
return -ENOMEM;
}
/*
* RNG4 based SECs (v5+) need special initialization prior
* to executing any descriptors
*/
if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) {
kick_trng(pdev);
ret = instantiate_rng(ctrlpriv->jrdev[0]);
if (ret) {
caam_remove(pdev);
return ret;
}
}
/* NOTE: RTIC detection ought to go here, around Si time */
/* Initialize queue allocator lock */
spin_lock_init(&ctrlpriv->jr_alloc_lock);
caam_id = rd_reg64(&topregs->ctrl.perfmon.caam_id);
/* Report "alive" for developer to see */
dev_info(dev, "device ID = 0x%016llx\n",
rd_reg64(&topregs->ctrl.perfmon.caam_id));
dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
caam_get_era(caam_id));
dev_info(dev, "job rings = %d, qi = %d\n",
ctrlpriv->total_jobrs, ctrlpriv->qi_present);

View File

@ -0,0 +1,13 @@
/*
* CAAM control-plane driver backend public-level include definitions
*
* Copyright 2012 Freescale Semiconductor, Inc.
*/
#ifndef CTRL_H
#define CTRL_H
/* Prototypes for backend-level services exposed to APIs */
int caam_get_era(u64 caam_id);
#endif /* CTRL_H */

View File

@ -8,6 +8,16 @@
#ifndef DESC_H
#define DESC_H
struct sec4_sg_entry {
u64 ptr;
#define SEC4_SG_LEN_FIN 0x40000000
#define SEC4_SG_LEN_EXT 0x80000000
u32 len;
u8 reserved;
u8 buf_pool_id;
u16 offset;
};
/* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
#define MAX_CAAM_DESCSIZE 64
@ -1162,6 +1172,11 @@
#define OP_ALG_AAI_GSM (0x10 << OP_ALG_AAI_SHIFT)
#define OP_ALG_AAI_EDGE (0x20 << OP_ALG_AAI_SHIFT)
/* RNG4 set */
#define OP_ALG_RNG4_SHIFT 4
#define OP_ALG_RNG4_MASK (0x1f3 << OP_ALG_RNG4_SHIFT)
#define OP_ALG_RNG4_SK (0x100 << OP_ALG_RNG4_SHIFT)
#define OP_ALG_AS_SHIFT 2
#define OP_ALG_AS_MASK (0x3 << OP_ALG_AS_SHIFT)
@ -1585,20 +1600,4 @@
#define NFIFOENTRY_PLEN_SHIFT 0
#define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT)
/*
* PDB internal definitions
*/
/* IPSec ESP CBC Encap/Decap Options */
#define PDBOPTS_ESPCBC_ARSNONE 0x00 /* no antireplay window */
#define PDBOPTS_ESPCBC_ARS32 0x40 /* 32-entry antireplay window */
#define PDBOPTS_ESPCBC_ARS64 0xc0 /* 64-entry antireplay window */
#define PDBOPTS_ESPCBC_IVSRC 0x20 /* IV comes from internal random gen */
#define PDBOPTS_ESPCBC_ESN 0x10 /* extended sequence included */
#define PDBOPTS_ESPCBC_OUTFMT 0x08 /* output only decapsulation (decap) */
#define PDBOPTS_ESPCBC_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */
#define PDBOPTS_ESPCBC_INCIPHDR 0x04 /* Prepend IP header to output frame */
#define PDBOPTS_ESPCBC_IPVSN 0x02 /* process IPv6 header */
#define PDBOPTS_ESPCBC_TUNNEL 0x01 /* tunnel mode next-header byte */
#endif /* DESC_H */

View File

@ -1,7 +1,7 @@
/*
* caam descriptor construction helper functions
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
* Copyright 2008-2012 Freescale Semiconductor, Inc.
*/
#include "desc.h"
@ -51,7 +51,7 @@ static inline void *sh_desc_pdb(u32 *desc)
static inline void init_desc(u32 *desc, u32 options)
{
*desc = options | HDR_ONE | 1;
*desc = (options | HDR_ONE) + 1;
}
static inline void init_sh_desc(u32 *desc, u32 options)
@ -62,9 +62,9 @@ static inline void init_sh_desc(u32 *desc, u32 options)
static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
{
u32 pdb_len = pdb_bytes / CAAM_CMD_SZ + 1;
u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
init_sh_desc(desc, ((pdb_len << HDR_START_IDX_SHIFT) + pdb_len) |
init_sh_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT) + pdb_len) |
options);
}
@ -117,6 +117,15 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
append_ptr(desc, ptr);
}
/* Write length after pointer, rather than inside command */
static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
unsigned int len, u32 command)
{
append_cmd(desc, command);
append_ptr(desc, ptr);
append_cmd(desc, len);
}
static inline void append_cmd_data(u32 *desc, void *data, int len,
u32 command)
{
@ -166,13 +175,22 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
}
APPEND_CMD_PTR(key, KEY)
APPEND_CMD_PTR(seq_in_ptr, SEQ_IN_PTR)
APPEND_CMD_PTR(seq_out_ptr, SEQ_OUT_PTR)
APPEND_CMD_PTR(load, LOAD)
APPEND_CMD_PTR(store, STORE)
APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
APPEND_CMD_PTR(fifo_store, FIFO_STORE)
#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
unsigned int len, \
u32 options) \
{ \
PRINT_POS; \
append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
}
APPEND_SEQ_PTR_INTLEN(in, IN)
APPEND_SEQ_PTR_INTLEN(out, OUT)
#define APPEND_CMD_PTR_TO_IMM(cmd, op) \
static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
unsigned int len, u32 options) \
@ -183,6 +201,33 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
APPEND_CMD_PTR_TO_IMM(load, LOAD);
APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
#define APPEND_CMD_PTR_EXTLEN(cmd, op) \
static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \
unsigned int len, u32 options) \
{ \
PRINT_POS; \
append_cmd_ptr_extlen(desc, ptr, len, CMD_##op | SQIN_EXT | options); \
}
APPEND_CMD_PTR_EXTLEN(seq_in_ptr, SEQ_IN_PTR)
APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR)
/*
* Determine whether to store length internally or externally depending on
* the size of its type
*/
#define APPEND_CMD_PTR_LEN(cmd, op, type) \
static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \
type len, u32 options) \
{ \
PRINT_POS; \
if (sizeof(type) > sizeof(u16)) \
append_##cmd##_extlen(desc, ptr, len, options); \
else \
append_##cmd##_intlen(desc, ptr, len, options); \
}
APPEND_CMD_PTR_LEN(seq_in_ptr, SEQ_IN_PTR, u32)
APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
/*
* 2nd variant for commands whose specified immediate length differs
* from length of immediate data provided, e.g., split keys

View File

@ -39,18 +39,20 @@ static void report_ccb_status(u32 status, char *outstr)
char *cha_id_list[] = {
"",
"AES",
"DES, 3DES",
"DES",
"ARC4",
"MD5, SHA-1, SH-224, SHA-256, SHA-384, SHA-512",
"MDHA",
"RNG",
"SNOW f8",
"Kasumi f8, f9",
"All Public Key Algorithms",
"CRC",
"Kasumi f8/9",
"PKHA",
"CRCA",
"SNOW f9",
"ZUCE",
"ZUCA",
};
char *err_id_list[] = {
"None. No error.",
"No error.",
"Mode error.",
"Data size error.",
"Key size error.",
@ -67,6 +69,20 @@ static void report_ccb_status(u32 status, char *outstr)
"Invalid CHA combination was selected",
"Invalid CHA selected.",
};
char *rng_err_id_list[] = {
"",
"",
"",
"Instantiate",
"Not instantiated",
"Test instantiate",
"Prediction resistance",
"",
"Prediction resistance and test request",
"Uninstantiate",
"",
"Secure key generation",
};
u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >>
JRSTA_CCBERR_CHAID_SHIFT;
u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
@ -81,7 +97,13 @@ static void report_ccb_status(u32 status, char *outstr)
cha_id, sizeof("ff"));
}
if (err_id < ARRAY_SIZE(err_id_list)) {
if ((cha_id << JRSTA_CCBERR_CHAID_SHIFT) == JRSTA_CCBERR_CHAID_RNG &&
err_id < ARRAY_SIZE(rng_err_id_list) &&
strlen(rng_err_id_list[err_id])) {
/* RNG-only error */
SPRINTFCAT(outstr, "%s", rng_err_id_list[err_id],
strlen(rng_err_id_list[err_id]));
} else if (err_id < ARRAY_SIZE(err_id_list)) {
SPRINTFCAT(outstr, "%s", err_id_list[err_id],
strlen(err_id_list[err_id]));
} else {
@ -101,10 +123,10 @@ static void report_deco_status(u32 status, char *outstr)
u8 value;
char *error_text;
} desc_error_list[] = {
{ 0x00, "None. No error." },
{ 0x00, "No error." },
{ 0x01, "SGT Length Error. The descriptor is trying to read "
"more data than is contained in the SGT table." },
{ 0x02, "Reserved." },
{ 0x02, "SGT Null Entry Error." },
{ 0x03, "Job Ring Control Error. There is a bad value in the "
"Job Ring Control register." },
{ 0x04, "Invalid Descriptor Command. The Descriptor Command "
@ -116,7 +138,7 @@ static void report_deco_status(u32 status, char *outstr)
{ 0x09, "Invalid OPERATION Command" },
{ 0x0A, "Invalid FIFO LOAD Command" },
{ 0x0B, "Invalid FIFO STORE Command" },
{ 0x0C, "Invalid MOVE Command" },
{ 0x0C, "Invalid MOVE/MOVE_LEN Command" },
{ 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is "
"invalid because the target is not a Job Header "
"Command, or the jump is from a Trusted Descriptor to "
@ -166,6 +188,8 @@ static void report_deco_status(u32 status, char *outstr)
"(input frame; block ciphers) and IPsec decap (output "
"frame, when doing the next header byte update) and "
"DCRC (output frame)." },
{ 0x23, "Read Input Frame error" },
{ 0x24, "JDKEK, TDKEK or TDSK not loaded error" },
{ 0x80, "DNR (do not run) error" },
{ 0x81, "undefined protocol command" },
{ 0x82, "invalid setting in PDB" },

View File

@ -43,7 +43,7 @@ struct caam_drv_private_jr {
struct device *parentdev; /* points back to controller dev */
int ridx;
struct caam_job_ring __iomem *rregs; /* JobR's register space */
struct tasklet_struct irqtask[NR_CPUS];
struct tasklet_struct irqtask;
int irq; /* One per queue */
int assign; /* busy/free */
@ -86,10 +86,10 @@ struct caam_drv_private {
/* which jr allocated to scatterlist crypto */
atomic_t tfm_count ____cacheline_aligned;
int num_jrs_for_algapi;
struct device **algapi_jr;
/* list of registered crypto algorithms (mk generic context handle?) */
struct list_head alg_list;
/* list of registered hash algorithms (mk generic context handle?) */
struct list_head hash_list;
/*
* debugfs entries for developer view into driver/device

View File

@ -2,7 +2,7 @@
* CAAM/SEC 4.x transport/backend driver
* JobR backend functionality
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
* Copyright 2008-2012 Freescale Semiconductor, Inc.
*/
#include "compat.h"
@ -43,7 +43,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
wr_reg32(&jrp->rregs->jrintstatus, irqstate);
preempt_disable();
tasklet_schedule(&jrp->irqtask[smp_processor_id()]);
tasklet_schedule(&jrp->irqtask);
preempt_enable();
return IRQ_HANDLED;
@ -58,17 +58,16 @@ static void caam_jr_dequeue(unsigned long devarg)
void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
u32 *userdesc, userstatus;
void *userarg;
unsigned long flags;
spin_lock_irqsave(&jrp->outlock, flags);
while (rd_reg32(&jrp->rregs->outring_used)) {
head = ACCESS_ONCE(jrp->head);
sw_idx = tail = jrp->tail;
head = ACCESS_ONCE(jrp->head);
while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 &&
rd_reg32(&jrp->rregs->outring_used)) {
spin_lock_bh(&jrp->outlock);
sw_idx = tail = jrp->tail;
hw_idx = jrp->out_ring_read_index;
for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) {
sw_idx = (tail + i) & (JOBR_DEPTH - 1);
@ -95,7 +94,8 @@ static void caam_jr_dequeue(unsigned long devarg)
userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
userstatus = jrp->outring[hw_idx].jrstatus;
smp_mb();
/* set done */
wr_reg32(&jrp->rregs->outring_rmvd, 1);
jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
(JOBR_DEPTH - 1);
@ -115,22 +115,12 @@ static void caam_jr_dequeue(unsigned long devarg)
jrp->tail = tail;
}
/* set done */
wr_reg32(&jrp->rregs->outring_rmvd, 1);
spin_unlock_irqrestore(&jrp->outlock, flags);
spin_unlock_bh(&jrp->outlock);
/* Finally, execute user's callback */
usercall(dev, userdesc, userstatus, userarg);
spin_lock_irqsave(&jrp->outlock, flags);
head = ACCESS_ONCE(jrp->head);
sw_idx = tail = jrp->tail;
}
spin_unlock_irqrestore(&jrp->outlock, flags);
/* reenable / unmask IRQs */
clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
}
@ -148,23 +138,22 @@ int caam_jr_register(struct device *ctrldev, struct device **rdev)
{
struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
struct caam_drv_private_jr *jrpriv = NULL;
unsigned long flags;
int ring;
/* Lock, if free ring - assign, unlock */
spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
spin_lock(&ctrlpriv->jr_alloc_lock);
for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]);
if (jrpriv->assign == JOBR_UNASSIGNED) {
jrpriv->assign = JOBR_ASSIGNED;
*rdev = ctrlpriv->jrdev[ring];
spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
spin_unlock(&ctrlpriv->jr_alloc_lock);
return ring;
}
}
/* If assigned, write dev where caller needs it */
spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
spin_unlock(&ctrlpriv->jr_alloc_lock);
*rdev = NULL;
return -ENODEV;
@ -182,7 +171,6 @@ int caam_jr_deregister(struct device *rdev)
{
struct caam_drv_private_jr *jrpriv = dev_get_drvdata(rdev);
struct caam_drv_private *ctrlpriv;
unsigned long flags;
/* Get the owning controller's private space */
ctrlpriv = dev_get_drvdata(jrpriv->parentdev);
@ -195,9 +183,9 @@ int caam_jr_deregister(struct device *rdev)
return -EBUSY;
/* Release ring */
spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
spin_lock(&ctrlpriv->jr_alloc_lock);
jrpriv->assign = JOBR_UNASSIGNED;
spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
spin_unlock(&ctrlpriv->jr_alloc_lock);
return 0;
}
@ -238,7 +226,6 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
{
struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
struct caam_jrentry_info *head_entry;
unsigned long flags;
int head, tail, desc_size;
dma_addr_t desc_dma;
@ -249,14 +236,14 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
return -EIO;
}
spin_lock_irqsave(&jrp->inplock, flags);
spin_lock(&jrp->inplock);
head = jrp->head;
tail = ACCESS_ONCE(jrp->tail);
if (!rd_reg32(&jrp->rregs->inpring_avail) ||
CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
spin_unlock_irqrestore(&jrp->inplock, flags);
spin_unlock(&jrp->inplock);
dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
return -EBUSY;
}
@ -276,11 +263,9 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
(JOBR_DEPTH - 1);
jrp->head = (head + 1) & (JOBR_DEPTH - 1);
wmb();
wr_reg32(&jrp->rregs->inpring_jobadd, 1);
spin_unlock_irqrestore(&jrp->inplock, flags);
spin_unlock(&jrp->inplock);
return 0;
}
@ -337,11 +322,9 @@ static int caam_jr_init(struct device *dev)
jrp = dev_get_drvdata(dev);
/* Connect job ring interrupt handler. */
for_each_possible_cpu(i)
tasklet_init(&jrp->irqtask[i], caam_jr_dequeue,
(unsigned long)dev);
tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
/* Connect job ring interrupt handler. */
error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
"caam-jobr", dev);
if (error) {
@ -356,10 +339,11 @@ static int caam_jr_init(struct device *dev)
if (error)
return error;
jrp->inpring = kzalloc(sizeof(dma_addr_t) * JOBR_DEPTH,
GFP_KERNEL | GFP_DMA);
jrp->outring = kzalloc(sizeof(struct jr_outentry) *
JOBR_DEPTH, GFP_KERNEL | GFP_DMA);
jrp->inpring = dma_alloc_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
&inpbusaddr, GFP_KERNEL);
jrp->outring = dma_alloc_coherent(dev, sizeof(struct jr_outentry) *
JOBR_DEPTH, &outbusaddr, GFP_KERNEL);
jrp->entinfo = kzalloc(sizeof(struct caam_jrentry_info) * JOBR_DEPTH,
GFP_KERNEL);
@ -375,31 +359,6 @@ static int caam_jr_init(struct device *dev)
jrp->entinfo[i].desc_addr_dma = !0;
/* Setup rings */
inpbusaddr = dma_map_single(dev, jrp->inpring,
sizeof(u32 *) * JOBR_DEPTH,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, inpbusaddr)) {
dev_err(dev, "caam_jr_init(): can't map input ring\n");
kfree(jrp->inpring);
kfree(jrp->outring);
kfree(jrp->entinfo);
return -EIO;
}
outbusaddr = dma_map_single(dev, jrp->outring,
sizeof(struct jr_outentry) * JOBR_DEPTH,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, outbusaddr)) {
dev_err(dev, "caam_jr_init(): can't map output ring\n");
dma_unmap_single(dev, inpbusaddr,
sizeof(u32 *) * JOBR_DEPTH,
DMA_BIDIRECTIONAL);
kfree(jrp->inpring);
kfree(jrp->outring);
kfree(jrp->entinfo);
return -EIO;
}
jrp->inp_ring_write_index = 0;
jrp->out_ring_read_index = 0;
jrp->head = 0;
@ -431,12 +390,11 @@ int caam_jr_shutdown(struct device *dev)
{
struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
dma_addr_t inpbusaddr, outbusaddr;
int ret, i;
int ret;
ret = caam_reset_hw_jr(dev);
for_each_possible_cpu(i)
tasklet_kill(&jrp->irqtask[i]);
tasklet_kill(&jrp->irqtask);
/* Release interrupt */
free_irq(jrp->irq, dev);
@ -444,13 +402,10 @@ int caam_jr_shutdown(struct device *dev)
/* Free rings */
inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
outbusaddr = rd_reg64(&jrp->rregs->outring_base);
dma_unmap_single(dev, outbusaddr,
sizeof(struct jr_outentry) * JOBR_DEPTH,
DMA_BIDIRECTIONAL);
dma_unmap_single(dev, inpbusaddr, sizeof(u32 *) * JOBR_DEPTH,
DMA_BIDIRECTIONAL);
kfree(jrp->outring);
kfree(jrp->inpring);
dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
jrp->inpring, inpbusaddr);
dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH,
jrp->outring, outbusaddr);
kfree(jrp->entinfo);
return ret;
@ -503,6 +458,14 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
dev_set_drvdata(jrdev, jrpriv);
ctrlpriv->jrdev[ring] = jrdev;
if (sizeof(dma_addr_t) == sizeof(u64))
if (of_device_is_compatible(np, "fsl,sec-v5.0-job-ring"))
dma_set_mask(jrdev, DMA_BIT_MASK(40));
else
dma_set_mask(jrdev, DMA_BIT_MASK(36));
else
dma_set_mask(jrdev, DMA_BIT_MASK(32));
/* Identify the interrupt */
jrpriv->irq = of_irq_to_resource(np, 0, NULL);

View File

@ -0,0 +1,122 @@
/*
* CAAM/SEC 4.x functions for handling key-generation jobs
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
*
*/
#include "compat.h"
#include "jr.h"
#include "error.h"
#include "desc_constr.h"
#include "key_gen.h"
void split_key_done(struct device *dev, u32 *desc, u32 err,
void *context)
{
struct split_key_result *res = context;
#ifdef DEBUG
dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
if (err) {
char tmp[CAAM_ERROR_STR_MAX];
dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
}
res->err = err;
complete(&res->completion);
}
EXPORT_SYMBOL(split_key_done);
/*
get a split ipad/opad key
Split key generation-----------------------------------------------
[00] 0xb0810008 jobdesc: stidx=1 share=never len=8
[01] 0x04000014 key: class2->keyreg len=20
@0xffe01000
[03] 0x84410014 operation: cls2-op sha1 hmac init dec
[04] 0x24940000 fifold: class2 msgdata-last2 len=0 imm
[05] 0xa4000001 jump: class2 local all ->1 [06]
[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40
@0xffe04000
*/
u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
int split_key_pad_len, const u8 *key_in, u32 keylen,
u32 alg_op)
{
u32 *desc;
struct split_key_result result;
dma_addr_t dma_addr_in, dma_addr_out;
int ret = 0;
desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
init_job_desc(desc, 0);
dma_addr_in = dma_map_single(jrdev, (void *)key_in, keylen,
DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, dma_addr_in)) {
dev_err(jrdev, "unable to map key input memory\n");
kfree(desc);
return -ENOMEM;
}
append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
/* Sets MDHA up into an HMAC-INIT */
append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT);
/*
* do a FIFO_LOAD of zero, this will trigger the internal key expansion
* into both pads inside MDHA
*/
append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
/*
* FIFO_STORE with the explicit split-key content store
* (0x26 output type)
*/
dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len,
DMA_FROM_DEVICE);
if (dma_mapping_error(jrdev, dma_addr_out)) {
dev_err(jrdev, "unable to map key output memory\n");
kfree(desc);
return -ENOMEM;
}
append_fifo_store(desc, dma_addr_out, split_key_len,
LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
#endif
result.err = 0;
init_completion(&result.completion);
ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
if (!ret) {
/* in progress */
wait_for_completion_interruptible(&result.completion);
ret = result.err;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, key_out,
split_key_pad_len, 1);
#endif
}
dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len,
DMA_FROM_DEVICE);
dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE);
kfree(desc);
return ret;
}

View File

@ -0,0 +1,17 @@
/*
* CAAM/SEC 4.x definitions for handling key-generation jobs
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
*
*/
struct split_key_result {
struct completion completion;
int err;
};
void split_key_done(struct device *dev, u32 *desc, u32 err, void *context);
u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
int split_key_pad_len, const u8 *key_in, u32 keylen,
u32 alg_op);

401
drivers/crypto/caam/pdb.h Normal file
View File

@ -0,0 +1,401 @@
/*
* CAAM Protocol Data Block (PDB) definition header file
*
* Copyright 2008-2012 Freescale Semiconductor, Inc.
*
*/
#ifndef CAAM_PDB_H
#define CAAM_PDB_H
/*
* PDB- IPSec ESP Header Modification Options
*/
#define PDBHMO_ESP_DECAP_SHIFT 12
#define PDBHMO_ESP_ENCAP_SHIFT 4
/*
* Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the
* Options Byte IP version (IPvsn) field:
* if IPv4, decrement the inner IP header TTL field (byte 8);
* if IPv6 decrement the inner IP header Hop Limit field (byte 7).
*/
#define PDBHMO_ESP_DECAP_DEC_TTL (0x02 << PDBHMO_ESP_DECAP_SHIFT)
#define PDBHMO_ESP_ENCAP_DEC_TTL (0x02 << PDBHMO_ESP_ENCAP_SHIFT)
/*
* Decap - DiffServ Copy - Copy the IPv4 TOS or IPv6 Traffic Class byte
* from the outer IP header to the inner IP header.
*/
#define PDBHMO_ESP_DIFFSERV (0x01 << PDBHMO_ESP_DECAP_SHIFT)
/*
* Encap- Copy DF bit -if an IPv4 tunnel mode outer IP header is coming from
* the PDB, copy the DF bit from the inner IP header to the outer IP header.
*/
#define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT)
/*
* PDB - IPSec ESP Encap/Decap Options
*/
#define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */
#define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */
#define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */
#define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */
#define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */
#define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */
#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */
#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */
#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */
#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */
#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */
#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */
#define PDBOPTS_ESP_UPDATE_CSUM 0x80 /* encap-update ip header checksum */
#define PDBOPTS_ESP_VERIFY_CSUM 0x20 /* decap-validate ip header checksum */
/*
* General IPSec encap/decap PDB definitions
*/
struct ipsec_encap_cbc {
u32 iv[4];
};
struct ipsec_encap_ctr {
u32 ctr_nonce;
u32 ctr_initial;
u32 iv[2];
};
struct ipsec_encap_ccm {
u32 salt; /* lower 24 bits */
u8 b0_flags;
u8 ctr_flags;
u16 ctr_initial;
u32 iv[2];
};
struct ipsec_encap_gcm {
u32 salt; /* lower 24 bits */
u32 rsvd1;
u32 iv[2];
};
struct ipsec_encap_pdb {
u8 hmo_rsvd;
u8 ip_nh;
u8 ip_nh_offset;
u8 options;
u32 seq_num_ext_hi;
u32 seq_num;
union {
struct ipsec_encap_cbc cbc;
struct ipsec_encap_ctr ctr;
struct ipsec_encap_ccm ccm;
struct ipsec_encap_gcm gcm;
};
u32 spi;
u16 rsvd1;
u16 ip_hdr_len;
u32 ip_hdr[0]; /* optional IP Header content */
};
struct ipsec_decap_cbc {
u32 rsvd[2];
};
struct ipsec_decap_ctr {
u32 salt;
u32 ctr_initial;
};
struct ipsec_decap_ccm {
u32 salt;
u8 iv_flags;
u8 ctr_flags;
u16 ctr_initial;
};
struct ipsec_decap_gcm {
u32 salt;
u32 resvd;
};
struct ipsec_decap_pdb {
u16 hmo_ip_hdr_len;
u8 ip_nh_offset;
u8 options;
union {
struct ipsec_decap_cbc cbc;
struct ipsec_decap_ctr ctr;
struct ipsec_decap_ccm ccm;
struct ipsec_decap_gcm gcm;
};
u32 seq_num_ext_hi;
u32 seq_num;
u32 anti_replay[2];
u32 end_index[0];
};
/*
* IPSec ESP Datapath Protocol Override Register (DPOVRD)
*/
struct ipsec_deco_dpovrd {
#define IPSEC_ENCAP_DECO_DPOVRD_USE 0x80
u8 ovrd_ecn;
u8 ip_hdr_len;
u8 nh_offset;
u8 next_header; /* reserved if decap */
};
/*
* IEEE 802.11i WiFi Protocol Data Block
*/
#define WIFI_PDBOPTS_FCS 0x01
#define WIFI_PDBOPTS_AR 0x40
struct wifi_encap_pdb {
u16 mac_hdr_len;
u8 rsvd;
u8 options;
u8 iv_flags;
u8 pri;
u16 pn1;
u32 pn2;
u16 frm_ctrl_mask;
u16 seq_ctrl_mask;
u8 rsvd1[2];
u8 cnst;
u8 key_id;
u8 ctr_flags;
u8 rsvd2;
u16 ctr_init;
};
struct wifi_decap_pdb {
u16 mac_hdr_len;
u8 rsvd;
u8 options;
u8 iv_flags;
u8 pri;
u16 pn1;
u32 pn2;
u16 frm_ctrl_mask;
u16 seq_ctrl_mask;
u8 rsvd1[4];
u8 ctr_flags;
u8 rsvd2;
u16 ctr_init;
};
/*
* IEEE 802.16 WiMAX Protocol Data Block
*/
#define WIMAX_PDBOPTS_FCS 0x01
#define WIMAX_PDBOPTS_AR 0x40 /* decap only */
struct wimax_encap_pdb {
u8 rsvd[3];
u8 options;
u32 nonce;
u8 b0_flags;
u8 ctr_flags;
u16 ctr_init;
/* begin DECO writeback region */
u32 pn;
/* end DECO writeback region */
};
struct wimax_decap_pdb {
u8 rsvd[3];
u8 options;
u32 nonce;
u8 iv_flags;
u8 ctr_flags;
u16 ctr_init;
/* begin DECO writeback region */
u32 pn;
u8 rsvd1[2];
u16 antireplay_len;
u64 antireplay_scorecard;
/* end DECO writeback region */
};
/*
* IEEE 801.AE MacSEC Protocol Data Block
*/
#define MACSEC_PDBOPTS_FCS 0x01
#define MACSEC_PDBOPTS_AR 0x40 /* used in decap only */
struct macsec_encap_pdb {
u16 aad_len;
u8 rsvd;
u8 options;
u64 sci;
u16 ethertype;
u8 tci_an;
u8 rsvd1;
/* begin DECO writeback region */
u32 pn;
/* end DECO writeback region */
};
struct macsec_decap_pdb {
u16 aad_len;
u8 rsvd;
u8 options;
u64 sci;
u8 rsvd1[3];
/* begin DECO writeback region */
u8 antireplay_len;
u32 pn;
u64 antireplay_scorecard;
/* end DECO writeback region */
};
/*
* SSL/TLS/DTLS Protocol Data Blocks
*/
#define TLS_PDBOPTS_ARS32 0x40
#define TLS_PDBOPTS_ARS64 0xc0
#define TLS_PDBOPTS_OUTFMT 0x08
#define TLS_PDBOPTS_IV_WRTBK 0x02 /* 1.1/1.2/DTLS only */
#define TLS_PDBOPTS_EXP_RND_IV 0x01 /* 1.1/1.2/DTLS only */
struct tls_block_encap_pdb {
u8 type;
u8 version[2];
u8 options;
u64 seq_num;
u32 iv[4];
};
struct tls_stream_encap_pdb {
u8 type;
u8 version[2];
u8 options;
u64 seq_num;
u8 i;
u8 j;
u8 rsvd1[2];
};
struct dtls_block_encap_pdb {
u8 type;
u8 version[2];
u8 options;
u16 epoch;
u16 seq_num[3];
u32 iv[4];
};
struct tls_block_decap_pdb {
u8 rsvd[3];
u8 options;
u64 seq_num;
u32 iv[4];
};
struct tls_stream_decap_pdb {
u8 rsvd[3];
u8 options;
u64 seq_num;
u8 i;
u8 j;
u8 rsvd1[2];
};
struct dtls_block_decap_pdb {
u8 rsvd[3];
u8 options;
u16 epoch;
u16 seq_num[3];
u32 iv[4];
u64 antireplay_scorecard;
};
/*
* SRTP Protocol Data Blocks
*/
#define SRTP_PDBOPTS_MKI 0x08
#define SRTP_PDBOPTS_AR 0x40
struct srtp_encap_pdb {
u8 x_len;
u8 mki_len;
u8 n_tag;
u8 options;
u32 cnst0;
u8 rsvd[2];
u16 cnst1;
u16 salt[7];
u16 cnst2;
u32 rsvd1;
u32 roc;
u32 opt_mki;
};
struct srtp_decap_pdb {
u8 x_len;
u8 mki_len;
u8 n_tag;
u8 options;
u32 cnst0;
u8 rsvd[2];
u16 cnst1;
u16 salt[7];
u16 cnst2;
u16 rsvd1;
u16 seq_num;
u32 roc;
u64 antireplay_scorecard;
};
/*
* DSA/ECDSA Protocol Data Blocks
* Two of these exist: DSA-SIGN, and DSA-VERIFY. They are similar
* except for the treatment of "w" for verify, "s" for sign,
* and the placement of "a,b".
*/
#define DSA_PDB_SGF_SHIFT 24
#define DSA_PDB_SGF_MASK (0xff << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_Q (0x80 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_R (0x40 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_G (0x20 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_W (0x10 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_S (0x10 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_F (0x08 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_C (0x04 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_D (0x02 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_AB_SIGN (0x02 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_SGF_AB_VERIFY (0x01 << DSA_PDB_SGF_SHIFT)
#define DSA_PDB_L_SHIFT 7
#define DSA_PDB_L_MASK (0x3ff << DSA_PDB_L_SHIFT)
#define DSA_PDB_N_MASK 0x7f
struct dsa_sign_pdb {
u32 sgf_ln; /* Use DSA_PDB_ defintions per above */
u8 *q;
u8 *r;
u8 *g; /* or Gx,y */
u8 *s;
u8 *f;
u8 *c;
u8 *d;
u8 *ab; /* ECC only */
u8 *u;
};
struct dsa_verify_pdb {
u32 sgf_ln;
u8 *q;
u8 *r;
u8 *g; /* or Gx,y */
u8 *w; /* or Wx,y */
u8 *f;
u8 *c;
u8 *d;
u8 *tmp; /* temporary data block */
u8 *ab; /* only used if ECC processing */
};
#endif

View File

@ -117,6 +117,12 @@ struct jr_outentry {
#define CHA_NUM_DECONUM_SHIFT 56
#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT)
struct sec_vid {
u16 ip_id;
u8 maj_rev;
u8 min_rev;
};
struct caam_perfmon {
/* Performance Monitor Registers f00-f9f */
u64 req_dequeued; /* PC_REQ_DEQ - Dequeued Requests */
@ -167,7 +173,7 @@ struct partid {
u32 pidr; /* partition ID, DECO */
};
/* RNG test mode (replicated twice in some configurations) */
/* RNGB test mode (replicated twice in some configurations) */
/* Padded out to 0x100 */
struct rngtst {
u32 mode; /* RTSTMODEx - Test mode */
@ -200,6 +206,31 @@ struct rngtst {
u32 rsvd14[15];
};
/* RNG4 TRNG test registers */
struct rng4tst {
#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */
u32 rtmctl; /* misc. control register */
u32 rtscmisc; /* statistical check misc. register */
u32 rtpkrrng; /* poker range register */
union {
u32 rtpkrmax; /* PRGM=1: poker max. limit register */
u32 rtpkrsq; /* PRGM=0: poker square calc. result register */
};
#define RTSDCTL_ENT_DLY_SHIFT 16
#define RTSDCTL_ENT_DLY_MASK (0xffff << RTSDCTL_ENT_DLY_SHIFT)
u32 rtsdctl; /* seed control register */
union {
u32 rtsblim; /* PRGM=1: sparse bit limit register */
u32 rttotsam; /* PRGM=0: total samples register */
};
u32 rtfrqmin; /* frequency count min. limit register */
union {
u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */
u32 rtfrqcnt; /* PRGM=0: freq. count register */
};
u32 rsvd1[56];
};
/*
* caam_ctrl - basic core configuration
* starts base + 0x0000 padded out to 0x1000
@ -249,7 +280,10 @@ struct caam_ctrl {
/* RNG Test/Verification/Debug Access 600-7ff */
/* (Useful in Test/Debug modes only...) */
struct rngtst rtst[2];
union {
struct rngtst rtst[2];
struct rng4tst r4tst[2];
};
u32 rsvd9[448];

View File

@ -0,0 +1,156 @@
/*
* CAAM/SEC 4.x functions for using scatterlists in caam driver
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
*
*/
struct sec4_sg_entry;
/*
* convert single dma address to h/w link table format
*/
static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
dma_addr_t dma, u32 len, u32 offset)
{
sec4_sg_ptr->ptr = dma;
sec4_sg_ptr->len = len;
sec4_sg_ptr->reserved = 0;
sec4_sg_ptr->buf_pool_id = 0;
sec4_sg_ptr->offset = offset;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
sizeof(struct sec4_sg_entry), 1);
#endif
}
/*
* convert scatterlist to h/w link table format
* but does not have final bit; instead, returns last entry
*/
static inline struct sec4_sg_entry *
sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
struct sec4_sg_entry *sec4_sg_ptr, u32 offset)
{
while (sg_count) {
dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg),
sg_dma_len(sg), offset);
sec4_sg_ptr++;
sg = scatterwalk_sg_next(sg);
sg_count--;
}
return sec4_sg_ptr - 1;
}
/*
* convert scatterlist to h/w link table format
* scatterlist must have been previously dma mapped
*/
static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
struct sec4_sg_entry *sec4_sg_ptr,
u32 offset)
{
sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
sec4_sg_ptr->len |= SEC4_SG_LEN_FIN;
}
/* count number of elements in scatterlist */
static inline int __sg_count(struct scatterlist *sg_list, int nbytes,
bool *chained)
{
struct scatterlist *sg = sg_list;
int sg_nents = 0;
while (nbytes > 0) {
sg_nents++;
nbytes -= sg->length;
if (!sg_is_last(sg) && (sg + 1)->length == 0)
*chained = true;
sg = scatterwalk_sg_next(sg);
}
return sg_nents;
}
/* derive number of elements in scatterlist, but return 0 for 1 */
static inline int sg_count(struct scatterlist *sg_list, int nbytes,
bool *chained)
{
int sg_nents = __sg_count(sg_list, nbytes, chained);
if (likely(sg_nents == 1))
return 0;
return sg_nents;
}
static int dma_map_sg_chained(struct device *dev, struct scatterlist *sg,
unsigned int nents, enum dma_data_direction dir,
bool chained)
{
if (unlikely(chained)) {
int i;
for (i = 0; i < nents; i++) {
dma_map_sg(dev, sg, 1, dir);
sg = scatterwalk_sg_next(sg);
}
} else {
dma_map_sg(dev, sg, nents, dir);
}
return nents;
}
static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg,
unsigned int nents, enum dma_data_direction dir,
bool chained)
{
if (unlikely(chained)) {
int i;
for (i = 0; i < nents; i++) {
dma_unmap_sg(dev, sg, 1, dir);
sg = scatterwalk_sg_next(sg);
}
} else {
dma_unmap_sg(dev, sg, nents, dir);
}
return nents;
}
/* Copy from len bytes of sg to dest, starting from beginning */
static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len)
{
struct scatterlist *current_sg = sg;
int cpy_index = 0, next_cpy_index = current_sg->length;
while (next_cpy_index < len) {
memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg),
current_sg->length);
current_sg = scatterwalk_sg_next(current_sg);
cpy_index = next_cpy_index;
next_cpy_index += current_sg->length;
}
if (cpy_index < len)
memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg),
len - cpy_index);
}
/* Copy sg data, from to_skip to end, to dest */
static inline void sg_copy_part(u8 *dest, struct scatterlist *sg,
int to_skip, unsigned int end)
{
struct scatterlist *current_sg = sg;
int sg_index, cpy_index;
sg_index = current_sg->length;
while (sg_index <= to_skip) {
current_sg = scatterwalk_sg_next(current_sg);
sg_index += current_sg->length;
}
cpy_index = sg_index - to_skip;
memcpy(dest, (u8 *) sg_virt(current_sg) +
current_sg->length - cpy_index, cpy_index);
current_sg = scatterwalk_sg_next(current_sg);
if (end - sg_index)
sg_copy(dest + cpy_index, current_sg, end - sg_index);
}

View File

@ -24,6 +24,7 @@
#define MV_CESA "MV-CESA:"
#define MAX_HW_HASH_SIZE 0xFFFF
#define MV_CESA_EXPIRE 500 /* msec */
/*
* STM:
@ -87,6 +88,7 @@ struct crypto_priv {
spinlock_t lock;
struct crypto_queue queue;
enum engine_status eng_st;
struct timer_list completion_timer;
struct crypto_async_request *cur_req;
struct req_progress p;
int max_req_size;
@ -138,6 +140,29 @@ struct mv_req_hash_ctx {
int count_add;
};
static void mv_completion_timer_callback(unsigned long unused)
{
int active = readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_EN_SEC_ACCL0;
printk(KERN_ERR MV_CESA
"completion timer expired (CESA %sactive), cleaning up.\n",
active ? "" : "in");
del_timer(&cpg->completion_timer);
writel(SEC_CMD_DISABLE_SEC, cpg->reg + SEC_ACCEL_CMD);
while(readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_DISABLE_SEC)
printk(KERN_INFO MV_CESA "%s: waiting for engine finishing\n", __func__);
cpg->eng_st = ENGINE_W_DEQUEUE;
wake_up_process(cpg->queue_th);
}
static void mv_setup_timer(void)
{
setup_timer(&cpg->completion_timer, &mv_completion_timer_callback, 0);
mod_timer(&cpg->completion_timer,
jiffies + msecs_to_jiffies(MV_CESA_EXPIRE));
}
static void compute_aes_dec_key(struct mv_ctx *ctx)
{
struct crypto_aes_ctx gen_aes_key;
@ -273,12 +298,8 @@ static void mv_process_current_q(int first_block)
sizeof(struct sec_accel_config));
/* GO */
mv_setup_timer();
writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
/*
* XXX: add timer if the interrupt does not occur for some mystery
* reason
*/
}
static void mv_crypto_algo_completion(void)
@ -357,12 +378,8 @@ static void mv_process_hash_current(int first_block)
memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
/* GO */
mv_setup_timer();
writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
/*
* XXX: add timer if the interrupt does not occur for some mystery
* reason
*/
}
static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
@ -406,6 +423,15 @@ out:
return rc;
}
static void mv_save_digest_state(struct mv_req_hash_ctx *ctx)
{
ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
}
static void mv_hash_algo_completion(void)
{
struct ahash_request *req = ahash_request_cast(cpg->cur_req);
@ -420,14 +446,12 @@ static void mv_hash_algo_completion(void)
memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
crypto_ahash_digestsize(crypto_ahash_reqtfm
(req)));
} else
} else {
mv_save_digest_state(ctx);
mv_hash_final_fallback(req);
}
} else {
ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
mv_save_digest_state(ctx);
}
}
@ -888,6 +912,10 @@ irqreturn_t crypto_int(int irq, void *priv)
if (!(val & SEC_INT_ACCEL0_DONE))
return IRQ_NONE;
if (!del_timer(&cpg->completion_timer)) {
printk(KERN_WARNING MV_CESA
"got an interrupt but no pending timer?\n");
}
val &= ~SEC_INT_ACCEL0_DONE;
writel(val, cpg->reg + FPGA_INT_STATUS);
writel(val, cpg->reg + SEC_ACCEL_INT_STATUS);
@ -1061,6 +1089,7 @@ static int mv_probe(struct platform_device *pdev)
if (!IS_ERR(cp->clk))
clk_prepare_enable(cp->clk);
writel(0, cpg->reg + SEC_ACCEL_INT_STATUS);
writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);

View File

@ -53,117 +53,6 @@
#include "talitos.h"
#define TALITOS_TIMEOUT 100000
#define TALITOS_MAX_DATA_LEN 65535
#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf)
/* descriptor pointer entry */
struct talitos_ptr {
__be16 len; /* length */
u8 j_extent; /* jump to sg link table and/or extent */
u8 eptr; /* extended address */
__be32 ptr; /* address */
};
static const struct talitos_ptr zero_entry = {
.len = 0,
.j_extent = 0,
.eptr = 0,
.ptr = 0
};
/* descriptor */
struct talitos_desc {
__be32 hdr; /* header high bits */
__be32 hdr_lo; /* header low bits */
struct talitos_ptr ptr[7]; /* ptr/len pair array */
};
/**
* talitos_request - descriptor submission request
* @desc: descriptor pointer (kernel virtual)
* @dma_desc: descriptor's physical bus address
* @callback: whom to call when descriptor processing is done
* @context: caller context (optional)
*/
struct talitos_request {
struct talitos_desc *desc;
dma_addr_t dma_desc;
void (*callback) (struct device *dev, struct talitos_desc *desc,
void *context, int error);
void *context;
};
/* per-channel fifo management */
struct talitos_channel {
void __iomem *reg;
/* request fifo */
struct talitos_request *fifo;
/* number of requests pending in channel h/w fifo */
atomic_t submit_count ____cacheline_aligned;
/* request submission (head) lock */
spinlock_t head_lock ____cacheline_aligned;
/* index to next free descriptor request */
int head;
/* request release (tail) lock */
spinlock_t tail_lock ____cacheline_aligned;
/* index to next in-progress/done descriptor request */
int tail;
};
struct talitos_private {
struct device *dev;
struct platform_device *ofdev;
void __iomem *reg;
int irq[2];
/* SEC global registers lock */
spinlock_t reg_lock ____cacheline_aligned;
/* SEC version geometry (from device tree node) */
unsigned int num_channels;
unsigned int chfifo_len;
unsigned int exec_units;
unsigned int desc_types;
/* SEC Compatibility info */
unsigned long features;
/*
* length of the request fifo
* fifo_len is chfifo_len rounded up to next power of 2
* so we can use bitwise ops to wrap
*/
unsigned int fifo_len;
struct talitos_channel *chan;
/* next channel to be assigned next incoming descriptor */
atomic_t last_chan ____cacheline_aligned;
/* request callback tasklet */
struct tasklet_struct done_task[2];
/* list of registered algorithms */
struct list_head alg_list;
/* hwrng device */
struct hwrng rng;
};
/* .features flag */
#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
#define TALITOS_FTR_SHA224_HWINIT 0x00000004
#define TALITOS_FTR_HMAC_OK 0x00000008
static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
{
talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
@ -303,11 +192,11 @@ static int init_device(struct device *dev)
* callback must check err and feedback in descriptor header
* for device processing status.
*/
static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error),
void *context)
int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error),
void *context)
{
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_request *request;
@ -348,6 +237,7 @@ static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
return -EINPROGRESS;
}
EXPORT_SYMBOL(talitos_submit);
/*
* process what was done, notify callback of error if not
@ -733,7 +623,7 @@ static void talitos_unregister_rng(struct device *dev)
* crypto alg
*/
#define TALITOS_CRA_PRIORITY 3000
#define TALITOS_MAX_KEY_SIZE 64
#define TALITOS_MAX_KEY_SIZE 96
#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
#define MD5_BLOCK_SIZE 64
@ -2066,6 +1956,59 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha224),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA224_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUA |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA224_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
@ -2119,6 +2062,112 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha384),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha384-cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA384_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha384),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha384-cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA384_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA384_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha512),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha512-cbc-aes-talitos",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = SHA512_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(sha512),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha512-cbc-3des-talitos",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_type = &crypto_aead_type,
.cra_aead = {
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.encrypt = aead_encrypt,
.decrypt = aead_decrypt,
.givencrypt = aead_givencrypt,
.geniv = "<built-in>",
.ivsize = DES3_EDE_BLOCK_SIZE,
.maxauthsize = SHA512_DIGEST_SIZE,
}
},
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
DESC_HDR_MODE0_DEU_3DES |
DESC_HDR_SEL1_MDEUB |
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEUB_SHA512_HMAC,
},
{ .type = CRYPTO_ALG_TYPE_AEAD,
.alg.crypto = {
.cra_name = "authenc(hmac(md5),cbc(aes))",

View File

@ -28,6 +28,123 @@
*
*/
#define TALITOS_TIMEOUT 100000
#define TALITOS_MAX_DATA_LEN 65535
#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf)
/* descriptor pointer entry */
struct talitos_ptr {
__be16 len; /* length */
u8 j_extent; /* jump to sg link table and/or extent */
u8 eptr; /* extended address */
__be32 ptr; /* address */
};
static const struct talitos_ptr zero_entry = {
.len = 0,
.j_extent = 0,
.eptr = 0,
.ptr = 0
};
/* descriptor */
struct talitos_desc {
__be32 hdr; /* header high bits */
__be32 hdr_lo; /* header low bits */
struct talitos_ptr ptr[7]; /* ptr/len pair array */
};
/**
* talitos_request - descriptor submission request
* @desc: descriptor pointer (kernel virtual)
* @dma_desc: descriptor's physical bus address
* @callback: whom to call when descriptor processing is done
* @context: caller context (optional)
*/
struct talitos_request {
struct talitos_desc *desc;
dma_addr_t dma_desc;
void (*callback) (struct device *dev, struct talitos_desc *desc,
void *context, int error);
void *context;
};
/* per-channel fifo management */
struct talitos_channel {
void __iomem *reg;
/* request fifo */
struct talitos_request *fifo;
/* number of requests pending in channel h/w fifo */
atomic_t submit_count ____cacheline_aligned;
/* request submission (head) lock */
spinlock_t head_lock ____cacheline_aligned;
/* index to next free descriptor request */
int head;
/* request release (tail) lock */
spinlock_t tail_lock ____cacheline_aligned;
/* index to next in-progress/done descriptor request */
int tail;
};
struct talitos_private {
struct device *dev;
struct platform_device *ofdev;
void __iomem *reg;
int irq[2];
/* SEC global registers lock */
spinlock_t reg_lock ____cacheline_aligned;
/* SEC version geometry (from device tree node) */
unsigned int num_channels;
unsigned int chfifo_len;
unsigned int exec_units;
unsigned int desc_types;
/* SEC Compatibility info */
unsigned long features;
/*
* length of the request fifo
* fifo_len is chfifo_len rounded up to next power of 2
* so we can use bitwise ops to wrap
*/
unsigned int fifo_len;
struct talitos_channel *chan;
/* next channel to be assigned next incoming descriptor */
atomic_t last_chan ____cacheline_aligned;
/* request callback tasklet */
struct tasklet_struct done_task[2];
/* list of registered algorithms */
struct list_head alg_list;
/* hwrng device */
struct hwrng rng;
};
extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error),
void *context);
/* .features flag */
#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
#define TALITOS_FTR_SHA224_HWINIT 0x00000004
#define TALITOS_FTR_HMAC_OK 0x00000008
/*
* TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register
*/
@ -209,6 +326,12 @@
DESC_HDR_MODE1_MDEU_HMAC)
#define DESC_HDR_MODE1_MDEU_SHA1_HMAC (DESC_HDR_MODE1_MDEU_SHA1 | \
DESC_HDR_MODE1_MDEU_HMAC)
#define DESC_HDR_MODE1_MDEU_SHA224_HMAC (DESC_HDR_MODE1_MDEU_SHA224 | \
DESC_HDR_MODE1_MDEU_HMAC)
#define DESC_HDR_MODE1_MDEUB_SHA384_HMAC (DESC_HDR_MODE1_MDEUB_SHA384 | \
DESC_HDR_MODE1_MDEU_HMAC)
#define DESC_HDR_MODE1_MDEUB_SHA512_HMAC (DESC_HDR_MODE1_MDEUB_SHA512 | \
DESC_HDR_MODE1_MDEU_HMAC)
/* direction of overall data flow (DIR) */
#define DESC_HDR_DIR_INBOUND cpu_to_be32(0x00000002)

View File

@ -0,0 +1,22 @@
#ifndef __LINUX_ATMEL_AES_H
#define __LINUX_ATMEL_AES_H
#include <mach/at_hdmac.h>
/**
* struct aes_dma_data - DMA data for AES
*/
struct aes_dma_data {
struct at_dma_slave txdata;
struct at_dma_slave rxdata;
};
/**
* struct aes_platform_data - board-specific AES configuration
* @dma_slave: DMA slave interface to use in data transfers.
*/
struct aes_platform_data {
struct aes_dma_data *dma_slave;
};
#endif /* __LINUX_ATMEL_AES_H */