/* * AMD Cryptographic Coprocessor (CCP) driver * * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * Author: Gary R Hook * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include "ccp-dev.h" static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count) { int start; struct ccp_device *ccp = cmd_q->ccp; for (;;) { mutex_lock(&ccp->sb_mutex); start = (u32)bitmap_find_next_zero_area(ccp->sb, ccp->sb_count, ccp->sb_start, count, 0); if (start <= ccp->sb_count) { bitmap_set(ccp->sb, start, count); mutex_unlock(&ccp->sb_mutex); break; } ccp->sb_avail = 0; mutex_unlock(&ccp->sb_mutex); /* Wait for KSB entries to become available */ if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) return 0; } return KSB_START + start; } static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start, unsigned int count) { struct ccp_device *ccp = cmd_q->ccp; if (!start) return; mutex_lock(&ccp->sb_mutex); bitmap_clear(ccp->sb, start - KSB_START, count); ccp->sb_avail = 1; mutex_unlock(&ccp->sb_mutex); wake_up_interruptible_all(&ccp->sb_queue); } static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q) { return CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); } static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) { struct ccp_cmd_queue *cmd_q = op->cmd_q; struct ccp_device *ccp = cmd_q->ccp; void __iomem *cr_addr; u32 cr0, cmd; unsigned int i; int ret = 0; /* We could read a status register to see how many free slots * are actually available, but reading that register resets it * and you could lose some error information. */ cmd_q->free_slots--; cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) | (op->jobid << REQ0_JOBID_SHIFT) | REQ0_WAIT_FOR_WRITE; if (op->soc) cr0 |= REQ0_STOP_ON_COMPLETE | REQ0_INT_ON_COMPLETE; if (op->ioc || !cmd_q->free_slots) cr0 |= REQ0_INT_ON_COMPLETE; /* Start at CMD_REQ1 */ cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; mutex_lock(&ccp->req_mutex); /* Write CMD_REQ1 through CMD_REQx first */ for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) iowrite32(*(cr + i), cr_addr); /* Tell the CCP to start */ wmb(); iowrite32(cr0, ccp->io_regs + CMD_REQ0); mutex_unlock(&ccp->req_mutex); if (cr0 & REQ0_INT_ON_COMPLETE) { /* Wait for the job to complete */ ret = wait_event_interruptible(cmd_q->int_queue, cmd_q->int_rcvd); if (ret || cmd_q->cmd_error) { /* On error delete all related jobs from the queue */ cmd = (cmd_q->id << DEL_Q_ID_SHIFT) | op->jobid; iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); if (!ret) ret = -EIO; } else if (op->soc) { /* Delete just head job from the queue on SoC */ cmd = DEL_Q_ACTIVE | (cmd_q->id << DEL_Q_ID_SHIFT) | op->jobid; iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); } cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); cmd_q->int_rcvd = 0; } return ret; } static int ccp_perform_aes(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) | (op->sb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); if (op->u.aes.mode == CCP_AES_MODE_CFB) cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); if (op->eom) cr[0] |= REQ1_EOM; if (op->init) cr[0] |= REQ1_INIT; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_xts_aes(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) | (op->sb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); if (op->eom) cr[0] |= REQ1_EOM; if (op->init) cr[0] |= REQ1_INIT; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_sha(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) | REQ1_INIT; cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); if (op->eom) { cr[0] |= REQ1_EOM; cr[4] = lower_32_bits(op->u.sha.msg_bits); cr[5] = upper_32_bits(op->u.sha.msg_bits); } else { cr[4] = 0; cr[5] = 0; } return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_rsa(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) | (op->sb_key << REQ1_KEY_KSB_SHIFT) | REQ1_EOM; cr[1] = op->u.rsa.input_len - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_passthru(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); if (op->src.type == CCP_MEMTYPE_SYSTEM) cr[1] = op->src.u.dma.length - 1; else cr[1] = op->dst.u.dma.length - 1; if (op->src.type == CCP_MEMTYPE_SYSTEM) { cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) cr[3] |= (op->sb_key << REQ4_KSB_SHIFT); } else { cr[2] = op->src.u.sb * CCP_SB_BYTES; cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT); } if (op->dst.type == CCP_MEMTYPE_SYSTEM) { cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); } else { cr[4] = op->dst.u.sb * CCP_SB_BYTES; cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT); } if (op->eom) cr[0] |= REQ1_EOM; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_ecc(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = REQ1_ECC_AFFINE_CONVERT | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) | REQ1_EOM; cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_init(struct ccp_device *ccp) { struct device *dev = ccp->dev; struct ccp_cmd_queue *cmd_q; struct dma_pool *dma_pool; char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; unsigned int qmr, qim, i; int ret; /* Find available queues */ qim = 0; qmr = ioread32(ccp->io_regs + Q_MASK_REG); for (i = 0; i < MAX_HW_QUEUES; i++) { if (!(qmr & (1 << i))) continue; /* Allocate a dma pool for this queue */ snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", ccp->name, i); dma_pool = dma_pool_create(dma_pool_name, dev, CCP_DMAPOOL_MAX_SIZE, CCP_DMAPOOL_ALIGN, 0); if (!dma_pool) { dev_err(dev, "unable to allocate dma pool\n"); ret = -ENOMEM; goto e_pool; } cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; ccp->cmd_q_count++; cmd_q->ccp = ccp; cmd_q->id = i; cmd_q->dma_pool = dma_pool; /* Reserve 2 KSB regions for the queue */ cmd_q->sb_key = KSB_START + ccp->sb_start++; cmd_q->sb_ctx = KSB_START + ccp->sb_start++; ccp->sb_count -= 2; /* Preset some register values and masks that are queue * number dependent */ cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + (CMD_Q_STATUS_INCR * i); cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + (CMD_Q_STATUS_INCR * i); cmd_q->int_ok = 1 << (i * 2); cmd_q->int_err = 1 << ((i * 2) + 1); cmd_q->free_slots = ccp_get_free_slots(cmd_q); init_waitqueue_head(&cmd_q->int_queue); /* Build queue interrupt mask (two interrupts per queue) */ qim |= cmd_q->int_ok | cmd_q->int_err; #ifdef CONFIG_ARM64 /* For arm64 set the recommended queue cache settings */ iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + (CMD_Q_CACHE_INC * i)); #endif dev_dbg(dev, "queue #%u available\n", i); } if (ccp->cmd_q_count == 0) { dev_notice(dev, "no command queues available\n"); ret = -EIO; goto e_pool; } dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); /* Disable and clear interrupts until ready */ iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); /* Request an irq */ ret = ccp->get_irq(ccp); if (ret) { dev_err(dev, "unable to allocate an IRQ\n"); goto e_pool; } /* Initialize the queues used to wait for KSB space and suspend */ init_waitqueue_head(&ccp->sb_queue); init_waitqueue_head(&ccp->suspend_queue); dev_dbg(dev, "Starting threads...\n"); /* Create a kthread for each queue */ for (i = 0; i < ccp->cmd_q_count; i++) { struct task_struct *kthread; cmd_q = &ccp->cmd_q[i]; kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, "%s-q%u", ccp->name, cmd_q->id); if (IS_ERR(kthread)) { dev_err(dev, "error creating queue thread (%ld)\n", PTR_ERR(kthread)); ret = PTR_ERR(kthread); goto e_kthread; } cmd_q->kthread = kthread; wake_up_process(kthread); } dev_dbg(dev, "Enabling interrupts...\n"); /* Enable interrupts */ iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); dev_dbg(dev, "Registering device...\n"); ccp_add_device(ccp); ret = ccp_register_rng(ccp); if (ret) goto e_kthread; /* Register the DMA engine support */ ret = ccp_dmaengine_register(ccp); if (ret) goto e_hwrng; return 0; e_hwrng: ccp_unregister_rng(ccp); e_kthread: for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); ccp->free_irq(ccp); e_pool: for (i = 0; i < ccp->cmd_q_count; i++) dma_pool_destroy(ccp->cmd_q[i].dma_pool); return ret; } static void ccp_destroy(struct ccp_device *ccp) { struct ccp_cmd_queue *cmd_q; struct ccp_cmd *cmd; unsigned int qim, i; /* Unregister the DMA engine */ ccp_dmaengine_unregister(ccp); /* Unregister the RNG */ ccp_unregister_rng(ccp); /* Remove this device from the list of available units */ ccp_del_device(ccp); /* Build queue interrupt mask (two interrupt masks per queue) */ qim = 0; for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; qim |= cmd_q->int_ok | cmd_q->int_err; } /* Disable and clear interrupts */ iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); /* Stop the queue kthreads */ for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); ccp->free_irq(ccp); for (i = 0; i < ccp->cmd_q_count; i++) dma_pool_destroy(ccp->cmd_q[i].dma_pool); /* Flush the cmd and backlog queue */ while (!list_empty(&ccp->cmd)) { /* Invoke the callback directly with an error code */ cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); list_del(&cmd->entry); cmd->callback(cmd->data, -ENODEV); } while (!list_empty(&ccp->backlog)) { /* Invoke the callback directly with an error code */ cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); list_del(&cmd->entry); cmd->callback(cmd->data, -ENODEV); } } static irqreturn_t ccp_irq_handler(int irq, void *data) { struct device *dev = data; struct ccp_device *ccp = dev_get_drvdata(dev); struct ccp_cmd_queue *cmd_q; u32 q_int, status; unsigned int i; status = ioread32(ccp->io_regs + IRQ_STATUS_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; q_int = status & (cmd_q->int_ok | cmd_q->int_err); if (q_int) { cmd_q->int_status = status; cmd_q->q_status = ioread32(cmd_q->reg_status); cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); /* On error, only save the first error value */ if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); cmd_q->int_rcvd = 1; /* Acknowledge the interrupt and wake the kthread */ iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); wake_up_interruptible(&cmd_q->int_queue); } } return IRQ_HANDLED; } static const struct ccp_actions ccp3_actions = { .aes = ccp_perform_aes, .xts_aes = ccp_perform_xts_aes, .sha = ccp_perform_sha, .rsa = ccp_perform_rsa, .passthru = ccp_perform_passthru, .ecc = ccp_perform_ecc, .sballoc = ccp_alloc_ksb, .sbfree = ccp_free_ksb, .init = ccp_init, .destroy = ccp_destroy, .get_free_slots = ccp_get_free_slots, .irqhandler = ccp_irq_handler, }; const struct ccp_vdata ccpv3 = { .version = CCP_VERSION(3, 0), .setup = NULL, .perform = &ccp3_actions, .bar = 2, .offset = 0x20000, };