diff options
author | Alex Porosanu <alexandru.porosanu@freescale.com> | 2015-02-18 18:55:52 (GMT) |
---|---|---|
committer | Honghua Yin <Hong-Hua.Yin@freescale.com> | 2015-03-31 03:04:14 (GMT) |
commit | 1bbe4d7969a8fa2eee0db459032402e6713f2a6e (patch) | |
tree | 7901239483d65aacaad9cb07ad2cbf83a7011120 /drivers/crypto | |
parent | 8a5d8f6a1b98ef51135b67a4c52eebb0e9b69040 (diff) | |
download | linux-fsl-qoriq-1bbe4d7969a8fa2eee0db459032402e6713f2a6e.tar.xz |
crypto: caam - remove list lookup of requests
This patch removes the per-packet lookup of a
completed request by adding an opaque after the SG
entries in the FD. While here, also the software
congestion control is removed and replaced with
a proper CGR with a sensible threshold. While here,
some very likely branches are decorated.
Change-Id: I48f2c71b6ac0d537843a44f8c0627c9b70c77592
Signed-off-by: Alex Porosanu <alexandru.porosanu@freescale.com>
Reviewed-on: http://git.am.freescale.net:8181/31368
Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com>
Reviewed-by: Alexandru Marginean <Alexandru.Marginean@freescale.com>
Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
Diffstat (limited to 'drivers/crypto')
-rw-r--r-- | drivers/crypto/caam/qi.c | 295 | ||||
-rw-r--r-- | drivers/crypto/caam/qi.h | 4 |
2 files changed, 80 insertions, 219 deletions
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 673f474..0a01674 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -15,10 +15,12 @@ #include "intern.h" #include "desc_constr.h" -#define CAAM_REQ_CGR_THRESHOLD 0x1000000 #define PRE_HDR_LEN 2 /* Length in u32 words */ #define PREHDR_RSLS_SHIFT 31 -#define PENDING_JOBS_DEPTH 512 +#ifndef CONFIG_FSL_DPAA_ETH +/* If DPA_ETH is not available, then use a reasonably backlog per CPU */ +#define MAX_RSP_FQ_BACKLOG_PER_CPU 64 +#endif /* * The jobs are processed by the driver against a driver context. * With every cryptographic context, a driver context is attached. @@ -44,27 +46,26 @@ struct caam_qi_pcpu_priv { struct napi_struct irqtask; /* IRQ task for QI backend */ struct net_device net_dev; /* netdev used by NAPI */ struct qman_fq rsp_fq; /* Response FQ from CAAM */ - spinlock_t listlock ____cacheline_aligned; /* for protecting - * simultaneous access - * to bklog_list */ - struct list_head bklog_list; /* List of pending responses*/ - atomic_t pending; /* Number of pending responses - * from CAAM on this cpu */ + u32 pool; /* Pool channel used by all from-SEC + queues */ } ____cacheline_aligned; static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv); struct caam_qi_priv { - bool sec_congested; /* Indicates whether SEC is congested */ - bool cpu_congested; /* Indicates whether CPU is congested */ struct qman_cgr rsp_cgr; /* QMAN response CGR */ - struct qman_cgr req_cgr; /* QMAN request CGR */ struct platform_device *qi_pdev; /* Platform device for QI backend */ }; static struct caam_qi_priv qipriv ____cacheline_aligned; /* + * This is written by one core - the one that initialized the CGR, and + * read by multiple cores (all the others) + */ +static bool caam_congested __read_mostly; + +/* * CPU from where the module initialised. This is required because * QMAN driver requires CGRs to be removed from same CPU from where * they were originally allocated @@ -73,16 +74,7 @@ static int mod_init_cpu; bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx) { - int pending; - - if (qipriv.sec_congested) - return true; - - pending = atomic_read(&per_cpu(pcpu_qipriv.pending, drv_ctx->cpu)); - if (pending >= PENDING_JOBS_DEPTH) - return true; - - return false; + return caam_congested; } EXPORT_SYMBOL(caam_drv_ctx_busy); @@ -90,33 +82,20 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) { struct qm_fd fd; int ret; - size_t size; - struct list_head *list; + const size_t size = 2 * sizeof(struct qm_sg_entry); int num_retries = 0; - unsigned long flags; fd.cmd = 0; fd.format = qm_fd_compound; fd.cong_weight = req->fd_sgt[1].length; - size = 2 * sizeof(struct qm_sg_entry); - - fd.addr = dma_map_single(qidev, req->fd_sgt, size , DMA_BIDIRECTIONAL); + fd.addr = dma_map_single(qidev, req->fd_sgt, size, + DMA_BIDIRECTIONAL); if (dma_mapping_error(qidev, fd.addr)) { dev_err(qidev, "DMA mapping error for QI enqueue request\n"); return -EIO; } - req->hwaddr = qm_fd_addr(&fd); - list = &per_cpu(pcpu_qipriv.bklog_list, req->drv_ctx->cpu); - - spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock, req->drv_ctx->cpu), - flags); - list_add_tail(&req->hdr__, list); - spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock, - req->drv_ctx->cpu), flags); - atomic_inc(&per_cpu(pcpu_qipriv.pending, req->drv_ctx->cpu)); - do { ret = qman_enqueue(req->drv_ctx->req_fq, &fd, 0); if (likely(!ret)) @@ -129,84 +108,16 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) dev_err(qidev, "qman_enqueue failed: %d\n", ret); - spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock, req->drv_ctx->cpu), - flags); - list_del(&req->hdr__); - spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock, - req->drv_ctx->cpu), flags); - atomic_dec(&per_cpu(pcpu_qipriv.pending, req->drv_ctx->cpu)); - - dma_unmap_single(qidev, fd.addr, size, DMA_BIDIRECTIONAL); return ret; } EXPORT_SYMBOL(caam_qi_enqueue); -struct caam_drv_req *lookup_drv_req(const struct qm_fd *fd, int cpu) -{ - struct list_head *pos, *list, *n; - struct caam_drv_req *req; - unsigned long flags; - - list = &per_cpu(pcpu_qipriv.bklog_list, cpu); - list_for_each_safe(pos, n, list) { - req = container_of(pos, struct caam_drv_req, hdr__); - - if (req->hwaddr == qm_fd_addr(fd)) { - BUG_ON(req->drv_ctx->cpu != cpu); - - spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock, - req->drv_ctx->cpu), flags); - list_del(&req->hdr__); - spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock, - req->drv_ctx->cpu), - flags); - atomic_dec(&per_cpu(pcpu_qipriv.pending, - req->drv_ctx->cpu)); - return req; - } - } - - return NULL; -} - - -static struct caam_drv_req *fd_to_drv_req(const struct qm_fd *fd) -{ - struct caam_drv_req *req; - const cpumask_t *cpus = qman_affine_cpus(); - int i; - - /* - * First check on this_cpu since this is likely case of normal caam - * response path. - */ - req = lookup_drv_req(fd, smp_processor_id()); - if (likely(req)) - return req; - /* - * If drv_req is not found on this_cpu, then try searching on other - * portal owning cpus. This is required to handle ERN callbacks and - * volatile dequeues. These may be issued on a CPU which is different - * than the one associated with the drv_req's drv_ctx. - */ - for_each_cpu(i, cpus) { - if (i == smp_processor_id()) - continue; /* Already checked */ - req = lookup_drv_req(fd, i); - - if (req) - return req; - } - - return NULL; -} - static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, const struct qm_mr_entry *msg) { const struct qm_fd *fd; struct caam_drv_req *drv_req; - size_t size; + const size_t size = 2 * sizeof(struct qm_sg_entry); struct device *qidev = &per_cpu(pcpu_qipriv.net_dev, smp_processor_id()).dev; @@ -217,46 +128,17 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, return; } - drv_req = fd_to_drv_req(fd); + drv_req = ((struct caam_drv_req *)phys_to_virt(fd->addr)); if (!drv_req) { dev_err(qidev, "Can't find original request for caam response\n"); return; } - size = 2 * sizeof(struct qm_sg_entry); - dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr, - size, DMA_BIDIRECTIONAL); - - drv_req->cbk(drv_req, -EIO); -} - -static enum qman_cb_dqrr_result caam_req_fq_dqrr_cb(struct qman_portal *p, - struct qman_fq *req_fq, - const struct qm_dqrr_entry *dqrr) -{ - struct caam_drv_req *drv_req; - const struct qm_fd *fd; - size_t size; - struct device *qidev = &per_cpu(pcpu_qipriv.net_dev, - smp_processor_id()).dev; - - fd = &dqrr->fd; - - drv_req = fd_to_drv_req(fd); - if (!drv_req) { - dev_err(qidev, - "Can't find original request for caam response\n"); - return qman_cb_dqrr_consume; - } - - size = 2 * sizeof(struct qm_sg_entry); dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr, size, DMA_BIDIRECTIONAL); drv_req->cbk(drv_req, -EIO); - - return qman_cb_dqrr_consume; } static struct qman_fq *create_caam_req_fq(struct device *qidev, @@ -274,7 +156,6 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev, return ERR_PTR(-ENOMEM); } - req_fq->cb.dqrr = caam_req_fq_dqrr_cb; req_fq->cb.ern = caam_fq_ern_cb; req_fq->cb.fqs = NULL; @@ -290,13 +171,11 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev, flags = fq_sched_flag; opts.we_mask = QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_DESTWQ | - QM_INITFQ_WE_CONTEXTB | QM_INITFQ_WE_CONTEXTA | - QM_INITFQ_WE_CGID; + QM_INITFQ_WE_CONTEXTB | QM_INITFQ_WE_CONTEXTA; - opts.fqd.fq_ctrl = QM_FQCTRL_CPCSTASH | QM_FQCTRL_CGE; + opts.fqd.fq_ctrl = QM_FQCTRL_CPCSTASH; opts.fqd.dest.channel = qm_channel_caam; - opts.fqd.dest.wq = 3; - opts.fqd.cgid = qipriv.req_cgr.cgrid; + opts.fqd.dest.wq = 0; opts.fqd.context_b = qman_fq_fqid(rsp_fq); opts.fqd.context_a.hi = upper_32_bits(hwdesc); opts.fqd.context_a.lo = lower_32_bits(hwdesc); @@ -635,18 +514,15 @@ int caam_qi_shutdown(struct device *qidev) */ set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu)); - ret = qman_delete_cgr(&priv->req_cgr); - if (ret) - dev_err(qidev, "Delete request CGR failed: %d\n", ret); - else - qman_release_cgrid(priv->req_cgr.cgrid); - ret = qman_delete_cgr(&priv->rsp_cgr); if (ret) dev_err(qidev, "Delete response CGR failed: %d\n", ret); else qman_release_cgrid(priv->rsp_cgr.cgrid); + /* Delete the pool channel */ + qman_release_pool(*this_cpu_ptr(&pcpu_qipriv.pool)); + /* Now that we're done with the CGRs, restore the cpus allowed mask */ set_cpus_allowed_ptr(current, &old_cpumask); @@ -657,29 +533,12 @@ int caam_qi_shutdown(struct device *qidev) static void rsp_cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) { - struct device *qidev = &per_cpu(pcpu_qipriv.net_dev, - smp_processor_id()).dev; - - qipriv.cpu_congested = congested; - - if (congested) - dev_warn(qidev, "CAAM rsp path congested\n"); - else - dev_info(qidev, "CAAM rsp path congestion state exit\n"); -} - -static void req_cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, - int congested) -{ - struct device *qidev = &per_cpu(pcpu_qipriv.net_dev, - smp_processor_id()).dev; - - qipriv.sec_congested = congested; + caam_congested = congested; if (congested) - dev_warn(qidev, "CAAM req path congested\n"); + pr_warn_ratelimited("CAAM rsp path congested\n"); else - dev_info(qidev, "CAAM req path congestion state exit\n"); + pr_info_ratelimited("CAAM rsp path congestion state exit\n"); } static int caam_qi_napi_schedule(struct napi_struct *napi) @@ -707,7 +566,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, struct napi_struct *napi; struct caam_drv_req *drv_req; const struct qm_fd *fd; - size_t size; + const size_t size = 2 * sizeof(struct qm_sg_entry); struct device *qidev = &per_cpu(pcpu_qipriv.net_dev, smp_processor_id()).dev; @@ -719,19 +578,18 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, if (unlikely(fd->status)) dev_err(qidev, "Error: %#x in CAAM response FD\n", fd->status); - if (qm_fd_compound != fd->format) { + if (unlikely(qm_fd_compound != fd->format)) { dev_err(qidev, "Non compound FD from CAAM\n"); return qman_cb_dqrr_consume; } - drv_req = fd_to_drv_req(fd); - if (!drv_req) { + drv_req = (struct caam_drv_req *)phys_to_virt(fd->addr); + if (unlikely(!drv_req)) { dev_err(qidev, "Can't find original request for caam response\n"); return qman_cb_dqrr_consume; } - size = 2 * sizeof(struct qm_sg_entry); dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr, size, DMA_BIDIRECTIONAL); @@ -740,7 +598,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_consume; } -static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) +static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu, u32 pool) { struct qm_mcc_initfq opts; struct qman_fq *fq; @@ -751,8 +609,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) fq->cb.dqrr = caam_rsp_fq_dqrr_cb; - flags = QMAN_FQ_FLAG_NO_ENQUEUE | - QMAN_FQ_FLAG_DYNAMIC_FQID; + flags = QMAN_FQ_FLAG_NO_ENQUEUE | QMAN_FQ_FLAG_DYNAMIC_FQID; ret = qman_create_fq(0, flags, fq); if (ret) { @@ -770,9 +627,9 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) QM_FQCTRL_CPCSTASH | QM_FQCTRL_CGE; - opts.fqd.dest.channel = qman_affine_channel(cpu); + opts.fqd.dest.channel = (u16)pool; opts.fqd.cgid = qipriv.rsp_cgr.cgrid; - opts.fqd.dest.wq = 1; + opts.fqd.dest.wq = 0; opts.fqd.context_a.stashing.exclusive = QM_STASHING_EXCL_CTX | QM_STASHING_EXCL_DATA; @@ -793,6 +650,9 @@ static int alloc_cgrs(struct device *qidev) { struct qm_mcc_initcgr opts; int ret; + const u64 cpus = *(u64 *)qman_affine_cpus(); + const int num_cpus = __builtin_popcountll(cpus); + u64 val; /*Allocate response CGR*/ ret = qman_alloc_cgrid(&qipriv.rsp_cgr.cgrid); @@ -807,64 +667,69 @@ static int alloc_cgrs(struct device *qidev) QM_CGR_WE_MODE; opts.cgr.cscn_en = QM_CGR_EN; opts.cgr.mode = QMAN_CGR_MODE_FRAME; - qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, 0x400 , 1); +#ifdef CONFIG_FSL_DPAA_ETH + /* + * This effectively sets the to-CPU threshold equal to half of the + * number of buffers available to dpa_eth driver. It means that at most + * half of the buffers can be in the pool channel from SEC, waiting + * to be transmitted to the core (and then on the TX queues). + * NOTE: This is an arbitrary division; the factor '2' below could + * also be '3' or '4'. It also depends on the number of devices + * using the dpa_eth buffers (which can be >1 if f.i. PME/DCE are + * also used. + */ + val = num_cpus * CONFIG_FSL_DPAA_ETH_MAX_BUF_COUNT / 2; +#else + val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU; +#endif + qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, val, 1); ret = qman_create_cgr(&qipriv.rsp_cgr, QMAN_CGR_FLAG_USE_INIT, &opts); if (ret) { dev_err(qidev, "Error %d creating CAAM rsp CGRID: %u\n", ret, qipriv.rsp_cgr.cgrid); - goto create_rsp_cgr_fail; - } - - /*Allocate request CGR*/ - ret = qman_alloc_cgrid(&qipriv.req_cgr.cgrid); - if (ret) { - dev_err(qidev, "CGR alloc failed for req FQs"); - goto alloc_req_cgrid_fail; - } - - qipriv.req_cgr.cb = req_cgr_cb; - memset(&opts, 0, sizeof(opts)); - opts.we_mask = QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES; - opts.cgr.cscn_en = QM_CGR_EN; - qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, CAAM_REQ_CGR_THRESHOLD , 1); - - ret = qman_create_cgr(&qipriv.req_cgr, - QMAN_CGR_FLAG_USE_INIT, &opts); - if (ret) { - dev_err(qidev, "Error %d creating CAAM req CGRID: %u\n", - ret, qipriv.req_cgr.cgrid); - goto create_req_cgr_fail; + return ret; } return 0; +} -create_req_cgr_fail: - qman_release_cgrid(qipriv.req_cgr.cgrid); - -alloc_req_cgrid_fail: - qman_delete_cgr(&qipriv.rsp_cgr); - -create_rsp_cgr_fail: - qman_release_cgrid(qipriv.rsp_cgr.cgrid); - - return ret; +static inline void add_cpu2pool(int cpu, u32 pool) +{ + struct qman_portal *portal = + (struct qman_portal *)qman_get_affine_portal(cpu); + qman_p_static_dequeue_add(portal, + QM_SDQCR_CHANNELS_POOL_CONV((u16)pool)); } static int alloc_rsp_fqs(struct device *qidev) { const cpumask_t *cpus = qman_affine_cpus(); int ret, i; + u32 pool; + + ret = qman_alloc_pool(&pool); + if (ret) { + dev_err(qidev, "CAAM pool alloc failed: %d\n", ret); + return ret; + } /*Now create response FQs*/ for_each_cpu(i, cpus) { - ret = alloc_rsp_fq_cpu(qidev, i); + ret = alloc_rsp_fq_cpu(qidev, i, pool); if (ret) { dev_err(qidev, "CAAM rsp FQ alloc failed, cpu: %u", i); return ret; } + add_cpu2pool(i, pool); } + /* + * The pool will be used (i.e. set as destination only from this CPU + * (the CPU performing the initialization). + */ + *this_cpu_ptr(&pcpu_qipriv.pool) = pool; + return 0; } @@ -905,6 +770,9 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np) return -ENODEV; } + /* Response path cannot be congested */ + caam_congested = false; + /* Initialise the CGRs congestion detection */ err = alloc_cgrs(qidev); if (err) { @@ -928,9 +796,6 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np) for_each_cpu(i, cpus) { per_cpu(pcpu_qipriv.net_dev, i).dev = *qidev; - spin_lock_init(&per_cpu(pcpu_qipriv.listlock, i)); - INIT_LIST_HEAD(&per_cpu(pcpu_qipriv.bklog_list, i)); - INIT_LIST_HEAD(&per_cpu(pcpu_qipriv.net_dev, i).napi_list); netif_napi_add(&per_cpu(pcpu_qipriv.net_dev, i), diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 109f890..e8e7c1a 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -85,10 +85,6 @@ struct caam_drv_req { struct caam_drv_ctx *drv_ctx; caam_qi_cbk cbk; void *app_ctx; - - /* The fields below are for internal use by QI backend driver */ - struct list_head hdr__; - dma_addr_t hwaddr; } ____cacheline_aligned; /* |