linux/drivers/crypto/ccp/ccp-dev-v3.c
Ard Biesheuvel 11548f5a57 crypto: ccp - set max RSA modulus size for v3 platform devices as well
AMD Seattle incorporates a non-PCI version of the v3 CCP crypto
accelerator, and this version was left behind when the maximum
RSA modulus size was parameterized in order to support v5 hardware
which supports larger moduli than v3 hardware does. Due to this
oversight, RSA acceleration no longer works at all on these systems.

Fix this by setting the .rsamax property to the appropriate value
for v3 platform hardware.

Fixes: e28c190db6 ("csrypto: ccp - Expand RSA support for a v5 ccp")
Cc: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2019-12-11 16:36:55 +08:00

599 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD Cryptographic Coprocessor (CCP) driver
*
* Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
* Author: Gary R Hook <gary.hook@amd.com>
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/interrupt.h>
#include <linux/ccp.h>
#include "ccp-dev.h"
static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count)
{
int start;
struct ccp_device *ccp = cmd_q->ccp;
for (;;) {
mutex_lock(&ccp->sb_mutex);
start = (u32)bitmap_find_next_zero_area(ccp->sb,
ccp->sb_count,
ccp->sb_start,
count, 0);
if (start <= ccp->sb_count) {
bitmap_set(ccp->sb, start, count);
mutex_unlock(&ccp->sb_mutex);
break;
}
ccp->sb_avail = 0;
mutex_unlock(&ccp->sb_mutex);
/* Wait for KSB entries to become available */
if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
return 0;
}
return KSB_START + start;
}
static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start,
unsigned int count)
{
struct ccp_device *ccp = cmd_q->ccp;
if (!start)
return;
mutex_lock(&ccp->sb_mutex);
bitmap_clear(ccp->sb, start - KSB_START, count);
ccp->sb_avail = 1;
mutex_unlock(&ccp->sb_mutex);
wake_up_interruptible_all(&ccp->sb_queue);
}
static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q)
{
return CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
}
static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
{
struct ccp_cmd_queue *cmd_q = op->cmd_q;
struct ccp_device *ccp = cmd_q->ccp;
void __iomem *cr_addr;
u32 cr0, cmd;
unsigned int i;
int ret = 0;
/* We could read a status register to see how many free slots
* are actually available, but reading that register resets it
* and you could lose some error information.
*/
cmd_q->free_slots--;
cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
| (op->jobid << REQ0_JOBID_SHIFT)
| REQ0_WAIT_FOR_WRITE;
if (op->soc)
cr0 |= REQ0_STOP_ON_COMPLETE
| REQ0_INT_ON_COMPLETE;
if (op->ioc || !cmd_q->free_slots)
cr0 |= REQ0_INT_ON_COMPLETE;
/* Start at CMD_REQ1 */
cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
mutex_lock(&ccp->req_mutex);
/* Write CMD_REQ1 through CMD_REQx first */
for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
iowrite32(*(cr + i), cr_addr);
/* Tell the CCP to start */
wmb();
iowrite32(cr0, ccp->io_regs + CMD_REQ0);
mutex_unlock(&ccp->req_mutex);
if (cr0 & REQ0_INT_ON_COMPLETE) {
/* Wait for the job to complete */
ret = wait_event_interruptible(cmd_q->int_queue,
cmd_q->int_rcvd);
if (ret || cmd_q->cmd_error) {
/* On error delete all related jobs from the queue */
cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
| op->jobid;
if (cmd_q->cmd_error)
ccp_log_error(cmd_q->ccp,
cmd_q->cmd_error);
iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
if (!ret)
ret = -EIO;
} else if (op->soc) {
/* Delete just head job from the queue on SoC */
cmd = DEL_Q_ACTIVE
| (cmd_q->id << DEL_Q_ID_SHIFT)
| op->jobid;
iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
}
cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
cmd_q->int_rcvd = 0;
}
return ret;
}
static int ccp_perform_aes(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
| (op->sb_key << REQ1_KEY_KSB_SHIFT);
cr[1] = op->src.u.dma.length - 1;
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
cr[4] = ccp_addr_lo(&op->dst.u.dma);
cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->dst.u.dma);
if (op->u.aes.mode == CCP_AES_MODE_CFB)
cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
if (op->eom)
cr[0] |= REQ1_EOM;
if (op->init)
cr[0] |= REQ1_INIT;
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static int ccp_perform_xts_aes(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
| (op->sb_key << REQ1_KEY_KSB_SHIFT);
cr[1] = op->src.u.dma.length - 1;
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
cr[4] = ccp_addr_lo(&op->dst.u.dma);
cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->dst.u.dma);
if (op->eom)
cr[0] |= REQ1_EOM;
if (op->init)
cr[0] |= REQ1_INIT;
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static int ccp_perform_sha(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
| REQ1_INIT;
cr[1] = op->src.u.dma.length - 1;
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
if (op->eom) {
cr[0] |= REQ1_EOM;
cr[4] = lower_32_bits(op->u.sha.msg_bits);
cr[5] = upper_32_bits(op->u.sha.msg_bits);
} else {
cr[4] = 0;
cr[5] = 0;
}
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static int ccp_perform_rsa(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
| (op->sb_key << REQ1_KEY_KSB_SHIFT)
| REQ1_EOM;
cr[1] = op->u.rsa.input_len - 1;
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
cr[4] = ccp_addr_lo(&op->dst.u.dma);
cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->dst.u.dma);
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static int ccp_perform_passthru(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
if (op->src.type == CCP_MEMTYPE_SYSTEM)
cr[1] = op->src.u.dma.length - 1;
else
cr[1] = op->dst.u.dma.length - 1;
if (op->src.type == CCP_MEMTYPE_SYSTEM) {
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
cr[3] |= (op->sb_key << REQ4_KSB_SHIFT);
} else {
cr[2] = op->src.u.sb * CCP_SB_BYTES;
cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT);
}
if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
cr[4] = ccp_addr_lo(&op->dst.u.dma);
cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->dst.u.dma);
} else {
cr[4] = op->dst.u.sb * CCP_SB_BYTES;
cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT);
}
if (op->eom)
cr[0] |= REQ1_EOM;
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static int ccp_perform_ecc(struct ccp_op *op)
{
u32 cr[6];
/* Fill out the register contents for REQ1 through REQ6 */
cr[0] = REQ1_ECC_AFFINE_CONVERT
| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
| REQ1_EOM;
cr[1] = op->src.u.dma.length - 1;
cr[2] = ccp_addr_lo(&op->src.u.dma);
cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->src.u.dma);
cr[4] = ccp_addr_lo(&op->dst.u.dma);
cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
| ccp_addr_hi(&op->dst.u.dma);
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}
static void ccp_disable_queue_interrupts(struct ccp_device *ccp)
{
iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
}
static void ccp_enable_queue_interrupts(struct ccp_device *ccp)
{
iowrite32(ccp->qim, ccp->io_regs + IRQ_MASK_REG);
}
static void ccp_irq_bh(unsigned long data)
{
struct ccp_device *ccp = (struct ccp_device *)data;
struct ccp_cmd_queue *cmd_q;
u32 q_int, status;
unsigned int i;
status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
q_int = status & (cmd_q->int_ok | cmd_q->int_err);
if (q_int) {
cmd_q->int_status = status;
cmd_q->q_status = ioread32(cmd_q->reg_status);
cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
/* On error, only save the first error value */
if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
cmd_q->int_rcvd = 1;
/* Acknowledge the interrupt and wake the kthread */
iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
wake_up_interruptible(&cmd_q->int_queue);
}
}
ccp_enable_queue_interrupts(ccp);
}
static irqreturn_t ccp_irq_handler(int irq, void *data)
{
struct ccp_device *ccp = (struct ccp_device *)data;
ccp_disable_queue_interrupts(ccp);
if (ccp->use_tasklet)
tasklet_schedule(&ccp->irq_tasklet);
else
ccp_irq_bh((unsigned long)ccp);
return IRQ_HANDLED;
}
static int ccp_init(struct ccp_device *ccp)
{
struct device *dev = ccp->dev;
struct ccp_cmd_queue *cmd_q;
struct dma_pool *dma_pool;
char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
unsigned int qmr, i;
int ret;
/* Find available queues */
ccp->qim = 0;
qmr = ioread32(ccp->io_regs + Q_MASK_REG);
for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) {
if (!(qmr & (1 << i)))
continue;
/* Allocate a dma pool for this queue */
snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
ccp->name, i);
dma_pool = dma_pool_create(dma_pool_name, dev,
CCP_DMAPOOL_MAX_SIZE,
CCP_DMAPOOL_ALIGN, 0);
if (!dma_pool) {
dev_err(dev, "unable to allocate dma pool\n");
ret = -ENOMEM;
goto e_pool;
}
cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
ccp->cmd_q_count++;
cmd_q->ccp = ccp;
cmd_q->id = i;
cmd_q->dma_pool = dma_pool;
/* Reserve 2 KSB regions for the queue */
cmd_q->sb_key = KSB_START + ccp->sb_start++;
cmd_q->sb_ctx = KSB_START + ccp->sb_start++;
ccp->sb_count -= 2;
/* Preset some register values and masks that are queue
* number dependent
*/
cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
(CMD_Q_STATUS_INCR * i);
cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
(CMD_Q_STATUS_INCR * i);
cmd_q->int_ok = 1 << (i * 2);
cmd_q->int_err = 1 << ((i * 2) + 1);
cmd_q->free_slots = ccp_get_free_slots(cmd_q);
init_waitqueue_head(&cmd_q->int_queue);
/* Build queue interrupt mask (two interrupts per queue) */
ccp->qim |= cmd_q->int_ok | cmd_q->int_err;
#ifdef CONFIG_ARM64
/* For arm64 set the recommended queue cache settings */
iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
(CMD_Q_CACHE_INC * i));
#endif
dev_dbg(dev, "queue #%u available\n", i);
}
if (ccp->cmd_q_count == 0) {
dev_notice(dev, "no command queues available\n");
ret = -EIO;
goto e_pool;
}
dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
/* Disable and clear interrupts until ready */
ccp_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
}
iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
/* Request an irq */
ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp);
if (ret) {
dev_err(dev, "unable to allocate an IRQ\n");
goto e_pool;
}
/* Initialize the ISR tasklet? */
if (ccp->use_tasklet)
tasklet_init(&ccp->irq_tasklet, ccp_irq_bh,
(unsigned long)ccp);
dev_dbg(dev, "Starting threads...\n");
/* Create a kthread for each queue */
for (i = 0; i < ccp->cmd_q_count; i++) {
struct task_struct *kthread;
cmd_q = &ccp->cmd_q[i];
kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
"%s-q%u", ccp->name, cmd_q->id);
if (IS_ERR(kthread)) {
dev_err(dev, "error creating queue thread (%ld)\n",
PTR_ERR(kthread));
ret = PTR_ERR(kthread);
goto e_kthread;
}
cmd_q->kthread = kthread;
wake_up_process(kthread);
}
dev_dbg(dev, "Enabling interrupts...\n");
/* Enable interrupts */
ccp_enable_queue_interrupts(ccp);
dev_dbg(dev, "Registering device...\n");
ccp_add_device(ccp);
ret = ccp_register_rng(ccp);
if (ret)
goto e_kthread;
/* Register the DMA engine support */
ret = ccp_dmaengine_register(ccp);
if (ret)
goto e_hwrng;
return 0;
e_hwrng:
ccp_unregister_rng(ccp);
e_kthread:
for (i = 0; i < ccp->cmd_q_count; i++)
if (ccp->cmd_q[i].kthread)
kthread_stop(ccp->cmd_q[i].kthread);
sp_free_ccp_irq(ccp->sp, ccp);
e_pool:
for (i = 0; i < ccp->cmd_q_count; i++)
dma_pool_destroy(ccp->cmd_q[i].dma_pool);
return ret;
}
static void ccp_destroy(struct ccp_device *ccp)
{
struct ccp_cmd_queue *cmd_q;
struct ccp_cmd *cmd;
unsigned int i;
/* Unregister the DMA engine */
ccp_dmaengine_unregister(ccp);
/* Unregister the RNG */
ccp_unregister_rng(ccp);
/* Remove this device from the list of available units */
ccp_del_device(ccp);
/* Disable and clear interrupts */
ccp_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
}
iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
/* Stop the queue kthreads */
for (i = 0; i < ccp->cmd_q_count; i++)
if (ccp->cmd_q[i].kthread)
kthread_stop(ccp->cmd_q[i].kthread);
sp_free_ccp_irq(ccp->sp, ccp);
for (i = 0; i < ccp->cmd_q_count; i++)
dma_pool_destroy(ccp->cmd_q[i].dma_pool);
/* Flush the cmd and backlog queue */
while (!list_empty(&ccp->cmd)) {
/* Invoke the callback directly with an error code */
cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
list_del(&cmd->entry);
cmd->callback(cmd->data, -ENODEV);
}
while (!list_empty(&ccp->backlog)) {
/* Invoke the callback directly with an error code */
cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
list_del(&cmd->entry);
cmd->callback(cmd->data, -ENODEV);
}
}
static const struct ccp_actions ccp3_actions = {
.aes = ccp_perform_aes,
.xts_aes = ccp_perform_xts_aes,
.des3 = NULL,
.sha = ccp_perform_sha,
.rsa = ccp_perform_rsa,
.passthru = ccp_perform_passthru,
.ecc = ccp_perform_ecc,
.sballoc = ccp_alloc_ksb,
.sbfree = ccp_free_ksb,
.init = ccp_init,
.destroy = ccp_destroy,
.get_free_slots = ccp_get_free_slots,
.irqhandler = ccp_irq_handler,
};
const struct ccp_vdata ccpv3_platform = {
.version = CCP_VERSION(3, 0),
.setup = NULL,
.perform = &ccp3_actions,
.offset = 0,
.rsamax = CCP_RSA_MAX_WIDTH,
};
const struct ccp_vdata ccpv3 = {
.version = CCP_VERSION(3, 0),
.setup = NULL,
.perform = &ccp3_actions,
.offset = 0x20000,
.rsamax = CCP_RSA_MAX_WIDTH,
};