From 30d4e3e63437fe50abff8e19f8351840e3e7ac6f Mon Sep 17 00:00:00 2001 From: Radu Alexe Date: Tue, 3 Oct 2017 14:48:17 +0300 Subject: dma: caam: add dma driver using scatter-gather This module introduces a SG DMA driver based on the DMA capabilities of the CAAM hardware block. CAAM DMA is a platform driver that is only probed if the device is defined in the device tree. The driver creates a DMA channel for each JR of the CAAM. This introduces a dependency on the JR driver. Therefore a defering mechanism was used to ensure that the CAAM DMA driver is probed only after the JR driver. Signed-off-by: Radu Alexe Signed-off-by: Tudor Ambarus Signed-off-by: Rajiv Vishwakarma diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 6ec6f8c..a8c3be7 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -355,6 +355,7 @@ #define FIFOLD_TYPE_PK_N (0x08 << FIFOLD_TYPE_SHIFT) #define FIFOLD_TYPE_PK_A (0x0c << FIFOLD_TYPE_SHIFT) #define FIFOLD_TYPE_PK_B (0x0d << FIFOLD_TYPE_SHIFT) +#define FIFOLD_TYPE_IFIFO (0x0f << FIFOLD_TYPE_SHIFT) /* Other types. Need to OR in last/flush bits as desired */ #define FIFOLD_TYPE_MSG_MASK (0x38 << FIFOLD_TYPE_SHIFT) @@ -408,6 +409,7 @@ #define FIFOST_TYPE_MESSAGE_DATA (0x30 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_RNGSTORE (0x34 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_RNGFIFO (0x35 << FIFOST_TYPE_SHIFT) +#define FIFOST_TYPE_METADATA (0x3e << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_SKIP (0x3f << FIFOST_TYPE_SHIFT) /* @@ -1444,6 +1446,7 @@ #define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) #define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) #define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) +#define MATH_SRC1_ZERO (0x0f << MATH_SRC1_SHIFT) /* Destination selectors */ #define MATH_DEST_SHIFT 8 diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index d258953..00e8709 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -23,6 +23,14 @@ struct jr_driver_data { static struct jr_driver_data driver_data; +static int jr_driver_probed; + +int caam_jr_driver_probed(void) +{ + return jr_driver_probed; +} +EXPORT_SYMBOL(caam_jr_driver_probed); + static int caam_reset_hw_jr(struct device *dev) { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); @@ -119,6 +127,8 @@ static int caam_jr_remove(struct platform_device *pdev) dev_err(jrdev, "Failed to shut down job ring\n"); irq_dispose_mapping(jrpriv->irq); + jr_driver_probed--; + return ret; } @@ -281,6 +291,36 @@ struct device *caam_jr_alloc(void) EXPORT_SYMBOL(caam_jr_alloc); /** + * caam_jridx_alloc() - Alloc a specific job ring based on its index. + * + * returns : pointer to the newly allocated physical + * JobR dev can be written to if successful. + **/ +struct device *caam_jridx_alloc(int idx) +{ + struct caam_drv_private_jr *jrpriv; + struct device *dev = ERR_PTR(-ENODEV); + + spin_lock(&driver_data.jr_alloc_lock); + + if (list_empty(&driver_data.jr_list)) + goto end; + + list_for_each_entry(jrpriv, &driver_data.jr_list, list_node) { + if (jrpriv->ridx == idx) { + atomic_inc(&jrpriv->tfm_count); + dev = jrpriv->dev; + break; + } + } + +end: + spin_unlock(&driver_data.jr_alloc_lock); + return dev; +} +EXPORT_SYMBOL(caam_jridx_alloc); + +/** * caam_jr_free() - Free the Job Ring * @rdev - points to the dev that identifies the Job ring to * be released. @@ -538,6 +578,8 @@ static int caam_jr_probe(struct platform_device *pdev) atomic_set(&jrpriv->tfm_count, 0); + jr_driver_probed++; + return 0; } diff --git a/drivers/crypto/caam/jr.h b/drivers/crypto/caam/jr.h index 97113a6..ee4d31c 100644 --- a/drivers/crypto/caam/jr.h +++ b/drivers/crypto/caam/jr.h @@ -8,7 +8,9 @@ #define JR_H /* Prototypes for backend-level services exposed to APIs */ +int caam_jr_driver_probed(void); struct device *caam_jr_alloc(void); +struct device *caam_jridx_alloc(int idx); void caam_jr_free(struct device *rdev); int caam_jr_enqueue(struct device *dev, u32 *desc, void (*cbk)(struct device *dev, u32 *desc, u32 status, diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index e5b0fb0..8caaf09 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -578,6 +578,23 @@ config ZX_DMA help Support the DMA engine for ZTE ZX296702 platform devices. +config CRYPTO_DEV_FSL_CAAM_DMA + tristate "CAAM DMA engine support" + depends on CRYPTO_DEV_FSL_CAAM_JR + default y + select DMA_ENGINE + select ASYNC_CORE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Selecting this will offload the DMA operations for users of + the scatter gather memcopy API to the CAAM via job rings. The + CAAM is a hardware module that provides hardware acceleration to + cryptographic operations. It has a built-in DMA controller that can + be programmed to read/write cryptographic data. This module defines + a DMA driver that uses the DMA capabilities of the CAAM. + + To compile this as a module, choose M here: the module + will be called caam_dma. # driver files source "drivers/dma/bestcomm/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 1226cbb4..a694da0 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_XGENE_DMA) += xgene-dma.o obj-$(CONFIG_ZX_DMA) += zx296702_dma.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_DMA) += caam_dma.o obj-y += qcom/ obj-y += xilinx/ diff --git a/drivers/dma/caam_dma.c b/drivers/dma/caam_dma.c new file mode 100644 index 0000000..0d837ad --- /dev/null +++ b/drivers/dma/caam_dma.c @@ -0,0 +1,504 @@ +/* + * caam support for SG DMA + * + * Copyright 2016 Freescale Semiconductor, Inc + * Copyright 2017 NXP + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "dmaengine.h" + +#include "../crypto/caam/regs.h" +#include "../crypto/caam/jr.h" +#include "../crypto/caam/error.h" +#include "../crypto/caam/intern.h" +#include "../crypto/caam/desc_constr.h" +#include "../crypto/caam/sg_sw_sec4.h" + +#define DESC_DMA_MEMCPY_LEN ((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / \ + CAAM_CMD_SZ) + +struct caam_dma_sh_desc { + u32 desc[DESC_DMA_MEMCPY_LEN] ____cacheline_aligned; + dma_addr_t desc_dma; +}; + +/* caam dma extended descriptor */ +struct caam_dma_edesc { + struct dma_async_tx_descriptor async_tx; + struct list_head node; + struct caam_dma_ctx *ctx; + dma_addr_t sec4_sg_dma; + dma_addr_t sec4_sg_dma_dst; + size_t sec4_sg_bytes; + unsigned int src_len; + unsigned int dst_len; + struct sec4_sg_entry *sec4_sg; + u32 jd[] ____cacheline_aligned; +}; + +/* + * caam_dma_ctx - per jr/channel context + * @chan: dma channel used by async_tx API + * @node: list_head used to attach to the global dma_ctx_list + * @jrdev: Job Ring device + * @submit_q: queue of pending (submitted, but not enqueued) jobs + * @done_not_acked: jobs that have been completed by jr, but maybe not acked + * @edesc_lock: protects extended descriptor + */ +struct caam_dma_ctx { + struct dma_chan chan; + struct list_head node; + struct device *jrdev; + struct list_head submit_q; + struct list_head done_not_acked; + spinlock_t edesc_lock; +}; + +static struct dma_device *dma_dev; +static struct caam_dma_sh_desc *dma_sh_desc; +static LIST_HEAD(dma_ctx_list); + +static dma_cookie_t caam_jr_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct caam_dma_edesc *edesc = NULL; + struct caam_dma_ctx *ctx = NULL; + dma_cookie_t cookie; + + edesc = container_of(tx, struct caam_dma_edesc, async_tx); + ctx = container_of(tx->chan, struct caam_dma_ctx, chan); + + spin_lock_bh(&ctx->edesc_lock); + + cookie = dma_cookie_assign(tx); + list_add_tail(&edesc->node, &ctx->submit_q); + + spin_unlock_bh(&ctx->edesc_lock); + + return cookie; +} + +static unsigned int caam_dma_sg_dma_len(struct scatterlist *sg, + unsigned int nents) +{ + unsigned int len; + + for (len = 0; sg && nents; sg = sg_next(sg), nents--) + len += sg_dma_len(sg); + + return len; +} + +static struct caam_dma_edesc *caam_dma_edesc_alloc(struct dma_chan *chan, + unsigned long flags, + struct scatterlist *dst_sg, + unsigned int dst_nents, + struct scatterlist *src_sg, + unsigned int src_nents) +{ + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx, + chan); + struct device *jrdev = ctx->jrdev; + struct caam_dma_edesc *edesc; + struct sec4_sg_entry *sec4_sg; + dma_addr_t sec4_sg_dma_src; + unsigned int sec4_sg_bytes; + + if (!dst_sg || !src_sg || !dst_nents || !src_nents) + return NULL; + + sec4_sg_bytes = (src_nents + dst_nents) * sizeof(*sec4_sg); + + edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes, + GFP_DMA | GFP_NOWAIT); + if (!edesc) + return ERR_PTR(-ENOMEM); + + edesc->src_len = caam_dma_sg_dma_len(src_sg, src_nents); + edesc->dst_len = caam_dma_sg_dma_len(dst_sg, dst_nents); + if (edesc->src_len != edesc->dst_len) { + dev_err(jrdev, "%s: src(%u) and dst(%u) len mismatch.\n", + __func__, edesc->src_len, edesc->dst_len); + kfree(edesc); + return ERR_PTR(-EINVAL); + } + + dma_async_tx_descriptor_init(&edesc->async_tx, chan); + edesc->async_tx.tx_submit = caam_jr_tx_submit; + edesc->async_tx.flags = flags; + edesc->async_tx.cookie = -EBUSY; + + /* Prepare SEC SGs */ + edesc->sec4_sg = (void *)edesc + offsetof(struct caam_dma_edesc, jd) + + DESC_JOB_IO_LEN; + + sec4_sg = edesc->sec4_sg; + sg_to_sec4_sg_last(src_sg, src_nents, sec4_sg, 0); + + sec4_sg += src_nents; + sg_to_sec4_sg_last(dst_sg, dst_nents, sec4_sg, 0); + + sec4_sg_dma_src = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, sec4_sg_dma_src)) { + dev_err(jrdev, "error mapping segments to device\n"); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } + + edesc->sec4_sg_dma = sec4_sg_dma_src; + edesc->sec4_sg_dma_dst = sec4_sg_dma_src + + src_nents * sizeof(*sec4_sg); + edesc->ctx = ctx; + + return edesc; +} + +static void caam_jr_chan_free_edesc(struct caam_dma_edesc *edesc) +{ + struct caam_dma_ctx *ctx = edesc->ctx; + struct caam_dma_edesc *_edesc = NULL; + + spin_lock_bh(&ctx->edesc_lock); + + list_add_tail(&edesc->node, &ctx->done_not_acked); + list_for_each_entry_safe(edesc, _edesc, &ctx->done_not_acked, node) { + if (async_tx_test_ack(&edesc->async_tx)) { + list_del(&edesc->node); + kfree(edesc); + } + } + + spin_unlock_bh(&ctx->edesc_lock); +} + +static void caam_dma_done(struct device *dev, u32 *hwdesc, u32 err, + void *context) +{ + struct caam_dma_edesc *edesc = context; + struct caam_dma_ctx *ctx = edesc->ctx; + dma_async_tx_callback callback; + void *callback_param; + + if (err) + caam_jr_strstatus(ctx->jrdev, err); + + dma_run_dependencies(&edesc->async_tx); + + spin_lock_bh(&ctx->edesc_lock); + dma_cookie_complete(&edesc->async_tx); + spin_unlock_bh(&ctx->edesc_lock); + + callback = edesc->async_tx.callback; + callback_param = edesc->async_tx.callback_param; + + dma_descriptor_unmap(&edesc->async_tx); + + caam_jr_chan_free_edesc(edesc); + + if (callback) + callback(callback_param); +} + +void init_dma_job(dma_addr_t sh_desc_dma, struct caam_dma_edesc *edesc) +{ + u32 *jd = edesc->jd; + u32 *sh_desc = dma_sh_desc->desc; + + /* init the job descriptor */ + init_job_desc_shared(jd, sh_desc_dma, desc_len(sh_desc), HDR_REVERSE); + + /* set SEQIN PTR */ + append_seq_in_ptr(jd, edesc->sec4_sg_dma, edesc->src_len, LDST_SGF); + + /* set SEQOUT PTR */ + append_seq_out_ptr(jd, edesc->sec4_sg_dma_dst, edesc->dst_len, + LDST_SGF); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, jd, desc_bytes(jd), 1); +#endif +} + +/* This function can be called from an interrupt context */ +static struct dma_async_tx_descriptor * +caam_jr_prep_dma_sg(struct dma_chan *chan, struct scatterlist *dst_sg, + unsigned int dst_nents, struct scatterlist *src_sg, + unsigned int src_nents, unsigned long flags) +{ + struct caam_dma_edesc *edesc; + + /* allocate extended descriptor */ + edesc = caam_dma_edesc_alloc(chan, flags, dst_sg, dst_nents, src_sg, + src_nents); + if (IS_ERR_OR_NULL(edesc)) + return ERR_CAST(edesc); + + /* Initialize job descriptor */ + init_dma_job(dma_sh_desc->desc_dma, edesc); + + return &edesc->async_tx; +} + +/* This function can be called in an interrupt context */ +static void caam_jr_issue_pending(struct dma_chan *chan) +{ + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx, + chan); + struct caam_dma_edesc *edesc, *_edesc; + + spin_lock_bh(&ctx->edesc_lock); + list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) { + if (caam_jr_enqueue(ctx->jrdev, edesc->jd, + caam_dma_done, edesc) < 0) + break; + list_del(&edesc->node); + } + spin_unlock_bh(&ctx->edesc_lock); +} + +static void caam_jr_free_chan_resources(struct dma_chan *chan) +{ + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx, + chan); + struct caam_dma_edesc *edesc, *_edesc; + + spin_lock_bh(&ctx->edesc_lock); + list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) { + list_del(&edesc->node); + kfree(edesc); + } + list_for_each_entry_safe(edesc, _edesc, &ctx->done_not_acked, node) { + list_del(&edesc->node); + kfree(edesc); + } + spin_unlock_bh(&ctx->edesc_lock); +} + +static int caam_dma_jr_chan_bind(void) +{ + struct device *jrdev; + struct caam_dma_ctx *ctx; + int bonds = 0; + int i; + + for (i = 0; i < caam_jr_driver_probed(); i++) { + jrdev = caam_jridx_alloc(i); + if (IS_ERR(jrdev)) { + pr_err("job ring device %d allocation failed\n", i); + continue; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + caam_jr_free(jrdev); + continue; + } + + ctx->chan.device = dma_dev; + ctx->chan.private = ctx; + + ctx->jrdev = jrdev; + + INIT_LIST_HEAD(&ctx->submit_q); + INIT_LIST_HEAD(&ctx->done_not_acked); + INIT_LIST_HEAD(&ctx->node); + spin_lock_init(&ctx->edesc_lock); + + dma_cookie_init(&ctx->chan); + + /* add the context of this channel to the context list */ + list_add_tail(&ctx->node, &dma_ctx_list); + + /* add this channel to the device chan list */ + list_add_tail(&ctx->chan.device_node, &dma_dev->channels); + + bonds++; + } + + return bonds; +} + +static inline void caam_jr_dma_free(struct dma_chan *chan) +{ + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx, + chan); + + list_del(&ctx->node); + list_del(&chan->device_node); + caam_jr_free(ctx->jrdev); + kfree(ctx); +} + +static void set_caam_dma_desc(u32 *desc) +{ + u32 *jmp_cmd; + + /* dma shared descriptor */ + init_sh_desc(desc, HDR_SHARE_NEVER | (1 << HDR_START_IDX_SHIFT)); + + /* REG0 = SEQINLEN - 65472 */ + append_math_sub_imm_u32(desc, REG0, SEQINLEN, IMM, 65472); + + /* if (REG0 > 0) + * jmp to LABEL1 + */ + jmp_cmd = append_jump(desc, JUMP_TEST_INVALL | JUMP_COND_MATH_N | + JUMP_COND_MATH_Z); + + /* REG0 = SEQINLEN */ + append_math_sub(desc, REG0, SEQINLEN, ZERO, CAAM_CMD_SZ); + + /* LABEL1 */ + set_jump_tgt_here(desc, jmp_cmd); + + /* VARSEQINLEN = REG0 */ + append_math_add(desc, VARSEQINLEN, REG0, ZERO, CAAM_CMD_SZ); + + /* VARSEQOUTLEN = REG0 */ + append_math_add(desc, VARSEQOUTLEN, REG0, ZERO, CAAM_CMD_SZ); + + /* do FIFO STORE */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_METADATA | LDST_VLF); + + /* do FIFO LOAD */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | + FIFOLD_TYPE_IFIFO | LDST_VLF); + + /* if (REG0 > 0) + * jmp 0xF8 (after shared desc header) + */ + append_jump(desc, JUMP_TEST_INVALL | JUMP_COND_MATH_N | + JUMP_COND_MATH_Z | 0xF8); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "caam dma shdesc@" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} + +static int __init caam_dma_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device *ctrldev = dev->parent; + struct dma_chan *chan, *_chan; + u32 *sh_desc; + int err = -ENOMEM; + int bonds; + + if (!caam_jr_driver_probed()) { + dev_info(dev, "Defer probing after JR driver probing\n"); + return -EPROBE_DEFER; + } + + dma_dev = kzalloc(sizeof(*dma_dev), GFP_KERNEL); + if (!dma_dev) + return -ENOMEM; + + dma_sh_desc = kzalloc(sizeof(*dma_sh_desc), GFP_KERNEL | GFP_DMA); + if (!dma_sh_desc) + goto desc_err; + + sh_desc = dma_sh_desc->desc; + set_caam_dma_desc(sh_desc); + dma_sh_desc->desc_dma = dma_map_single(ctrldev, sh_desc, + desc_bytes(sh_desc), + DMA_TO_DEVICE); + if (dma_mapping_error(ctrldev, dma_sh_desc->desc_dma)) { + dev_err(dev, "unable to map dma descriptor\n"); + goto map_err; + } + + INIT_LIST_HEAD(&dma_dev->channels); + + bonds = caam_dma_jr_chan_bind(); + if (!bonds) { + err = -ENODEV; + goto jr_bind_err; + } + + dma_dev->dev = dev; + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_cap_set(DMA_SG, dma_dev->cap_mask); + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = caam_jr_issue_pending; + dma_dev->device_prep_dma_sg = caam_jr_prep_dma_sg; + dma_dev->device_free_chan_resources = caam_jr_free_chan_resources; + + err = dma_async_device_register(dma_dev); + if (err) { + dev_err(dev, "Failed to register CAAM DMA engine\n"); + goto jr_bind_err; + } + + dev_info(dev, "caam dma support with %d job rings\n", bonds); + + return err; + +jr_bind_err: + list_for_each_entry_safe(chan, _chan, &dma_dev->channels, device_node) + caam_jr_dma_free(chan); + + dma_unmap_single(ctrldev, dma_sh_desc->desc_dma, desc_bytes(sh_desc), + DMA_TO_DEVICE); +map_err: + kfree(dma_sh_desc); +desc_err: + kfree(dma_dev); + return err; +} + +static int caam_dma_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device *ctrldev = dev->parent; + struct caam_dma_ctx *ctx, *_ctx; + + dma_async_device_unregister(dma_dev); + + list_for_each_entry_safe(ctx, _ctx, &dma_ctx_list, node) { + list_del(&ctx->node); + caam_jr_free(ctx->jrdev); + kfree(ctx); + } + + dma_unmap_single(ctrldev, dma_sh_desc->desc_dma, + desc_bytes(dma_sh_desc->desc), DMA_TO_DEVICE); + + kfree(dma_sh_desc); + kfree(dma_dev); + + dev_info(dev, "caam dma support disabled\n"); + return 0; +} + +static const struct of_device_id caam_dma_match[] = { + { .compatible = "fsl,sec-v5.4-dma", }, + { .compatible = "fsl,sec-v5.0-dma", }, + { .compatible = "fsl,sec-v4.0-dma", }, + {}, +}; +MODULE_DEVICE_TABLE(of, caam_dma_match); + +static struct platform_driver caam_dma_driver = { + .driver = { + .name = "caam-dma", + .of_match_table = caam_dma_match, + }, + .probe = caam_dma_probe, + .remove = caam_dma_remove, +}; +module_platform_driver(caam_dma_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("NXP CAAM support for SG DMA"); +MODULE_AUTHOR("NXP Semiconductors"); -- cgit v0.10.2