summaryrefslogtreecommitdiff
path: root/drivers/crypto
diff options
context:
space:
mode:
authorAlex Porosanu <alexandru.porosanu@freescale.com>2015-02-18 18:55:52 (GMT)
committerHonghua Yin <Hong-Hua.Yin@freescale.com>2015-03-31 03:04:14 (GMT)
commit1bbe4d7969a8fa2eee0db459032402e6713f2a6e (patch)
tree7901239483d65aacaad9cb07ad2cbf83a7011120 /drivers/crypto
parent8a5d8f6a1b98ef51135b67a4c52eebb0e9b69040 (diff)
downloadlinux-fsl-qoriq-1bbe4d7969a8fa2eee0db459032402e6713f2a6e.tar.xz
crypto: caam - remove list lookup of requests
This patch removes the per-packet lookup of a completed request by adding an opaque after the SG entries in the FD. While here, also the software congestion control is removed and replaced with a proper CGR with a sensible threshold. While here, some very likely branches are decorated. Change-Id: I48f2c71b6ac0d537843a44f8c0627c9b70c77592 Signed-off-by: Alex Porosanu <alexandru.porosanu@freescale.com> Reviewed-on: http://git.am.freescale.net:8181/31368 Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com> Reviewed-by: Alexandru Marginean <Alexandru.Marginean@freescale.com> Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/caam/qi.c295
-rw-r--r--drivers/crypto/caam/qi.h4
2 files changed, 80 insertions, 219 deletions
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 673f474..0a01674 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -15,10 +15,12 @@
#include "intern.h"
#include "desc_constr.h"
-#define CAAM_REQ_CGR_THRESHOLD 0x1000000
#define PRE_HDR_LEN 2 /* Length in u32 words */
#define PREHDR_RSLS_SHIFT 31
-#define PENDING_JOBS_DEPTH 512
+#ifndef CONFIG_FSL_DPAA_ETH
+/* If DPA_ETH is not available, then use a reasonably backlog per CPU */
+#define MAX_RSP_FQ_BACKLOG_PER_CPU 64
+#endif
/*
* The jobs are processed by the driver against a driver context.
* With every cryptographic context, a driver context is attached.
@@ -44,27 +46,26 @@ struct caam_qi_pcpu_priv {
struct napi_struct irqtask; /* IRQ task for QI backend */
struct net_device net_dev; /* netdev used by NAPI */
struct qman_fq rsp_fq; /* Response FQ from CAAM */
- spinlock_t listlock ____cacheline_aligned; /* for protecting
- * simultaneous access
- * to bklog_list */
- struct list_head bklog_list; /* List of pending responses*/
- atomic_t pending; /* Number of pending responses
- * from CAAM on this cpu */
+ u32 pool; /* Pool channel used by all from-SEC
+ queues */
} ____cacheline_aligned;
static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
struct caam_qi_priv {
- bool sec_congested; /* Indicates whether SEC is congested */
- bool cpu_congested; /* Indicates whether CPU is congested */
struct qman_cgr rsp_cgr; /* QMAN response CGR */
- struct qman_cgr req_cgr; /* QMAN request CGR */
struct platform_device *qi_pdev; /* Platform device for QI backend */
};
static struct caam_qi_priv qipriv ____cacheline_aligned;
/*
+ * This is written by one core - the one that initialized the CGR, and
+ * read by multiple cores (all the others)
+ */
+static bool caam_congested __read_mostly;
+
+/*
* CPU from where the module initialised. This is required because
* QMAN driver requires CGRs to be removed from same CPU from where
* they were originally allocated
@@ -73,16 +74,7 @@ static int mod_init_cpu;
bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx)
{
- int pending;
-
- if (qipriv.sec_congested)
- return true;
-
- pending = atomic_read(&per_cpu(pcpu_qipriv.pending, drv_ctx->cpu));
- if (pending >= PENDING_JOBS_DEPTH)
- return true;
-
- return false;
+ return caam_congested;
}
EXPORT_SYMBOL(caam_drv_ctx_busy);
@@ -90,33 +82,20 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
{
struct qm_fd fd;
int ret;
- size_t size;
- struct list_head *list;
+ const size_t size = 2 * sizeof(struct qm_sg_entry);
int num_retries = 0;
- unsigned long flags;
fd.cmd = 0;
fd.format = qm_fd_compound;
fd.cong_weight = req->fd_sgt[1].length;
- size = 2 * sizeof(struct qm_sg_entry);
-
- fd.addr = dma_map_single(qidev, req->fd_sgt, size , DMA_BIDIRECTIONAL);
+ fd.addr = dma_map_single(qidev, req->fd_sgt, size,
+ DMA_BIDIRECTIONAL);
if (dma_mapping_error(qidev, fd.addr)) {
dev_err(qidev, "DMA mapping error for QI enqueue request\n");
return -EIO;
}
- req->hwaddr = qm_fd_addr(&fd);
- list = &per_cpu(pcpu_qipriv.bklog_list, req->drv_ctx->cpu);
-
- spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock, req->drv_ctx->cpu),
- flags);
- list_add_tail(&req->hdr__, list);
- spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock,
- req->drv_ctx->cpu), flags);
- atomic_inc(&per_cpu(pcpu_qipriv.pending, req->drv_ctx->cpu));
-
do {
ret = qman_enqueue(req->drv_ctx->req_fq, &fd, 0);
if (likely(!ret))
@@ -129,84 +108,16 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
dev_err(qidev, "qman_enqueue failed: %d\n", ret);
- spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock, req->drv_ctx->cpu),
- flags);
- list_del(&req->hdr__);
- spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock,
- req->drv_ctx->cpu), flags);
- atomic_dec(&per_cpu(pcpu_qipriv.pending, req->drv_ctx->cpu));
-
- dma_unmap_single(qidev, fd.addr, size, DMA_BIDIRECTIONAL);
return ret;
}
EXPORT_SYMBOL(caam_qi_enqueue);
-struct caam_drv_req *lookup_drv_req(const struct qm_fd *fd, int cpu)
-{
- struct list_head *pos, *list, *n;
- struct caam_drv_req *req;
- unsigned long flags;
-
- list = &per_cpu(pcpu_qipriv.bklog_list, cpu);
- list_for_each_safe(pos, n, list) {
- req = container_of(pos, struct caam_drv_req, hdr__);
-
- if (req->hwaddr == qm_fd_addr(fd)) {
- BUG_ON(req->drv_ctx->cpu != cpu);
-
- spin_lock_irqsave(&per_cpu(pcpu_qipriv.listlock,
- req->drv_ctx->cpu), flags);
- list_del(&req->hdr__);
- spin_unlock_irqrestore(&per_cpu(pcpu_qipriv.listlock,
- req->drv_ctx->cpu),
- flags);
- atomic_dec(&per_cpu(pcpu_qipriv.pending,
- req->drv_ctx->cpu));
- return req;
- }
- }
-
- return NULL;
-}
-
-
-static struct caam_drv_req *fd_to_drv_req(const struct qm_fd *fd)
-{
- struct caam_drv_req *req;
- const cpumask_t *cpus = qman_affine_cpus();
- int i;
-
- /*
- * First check on this_cpu since this is likely case of normal caam
- * response path.
- */
- req = lookup_drv_req(fd, smp_processor_id());
- if (likely(req))
- return req;
- /*
- * If drv_req is not found on this_cpu, then try searching on other
- * portal owning cpus. This is required to handle ERN callbacks and
- * volatile dequeues. These may be issued on a CPU which is different
- * than the one associated with the drv_req's drv_ctx.
- */
- for_each_cpu(i, cpus) {
- if (i == smp_processor_id())
- continue; /* Already checked */
- req = lookup_drv_req(fd, i);
-
- if (req)
- return req;
- }
-
- return NULL;
-}
-
static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
const struct qm_mr_entry *msg)
{
const struct qm_fd *fd;
struct caam_drv_req *drv_req;
- size_t size;
+ const size_t size = 2 * sizeof(struct qm_sg_entry);
struct device *qidev = &per_cpu(pcpu_qipriv.net_dev,
smp_processor_id()).dev;
@@ -217,46 +128,17 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
return;
}
- drv_req = fd_to_drv_req(fd);
+ drv_req = ((struct caam_drv_req *)phys_to_virt(fd->addr));
if (!drv_req) {
dev_err(qidev,
"Can't find original request for caam response\n");
return;
}
- size = 2 * sizeof(struct qm_sg_entry);
- dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr,
- size, DMA_BIDIRECTIONAL);
-
- drv_req->cbk(drv_req, -EIO);
-}
-
-static enum qman_cb_dqrr_result caam_req_fq_dqrr_cb(struct qman_portal *p,
- struct qman_fq *req_fq,
- const struct qm_dqrr_entry *dqrr)
-{
- struct caam_drv_req *drv_req;
- const struct qm_fd *fd;
- size_t size;
- struct device *qidev = &per_cpu(pcpu_qipriv.net_dev,
- smp_processor_id()).dev;
-
- fd = &dqrr->fd;
-
- drv_req = fd_to_drv_req(fd);
- if (!drv_req) {
- dev_err(qidev,
- "Can't find original request for caam response\n");
- return qman_cb_dqrr_consume;
- }
-
- size = 2 * sizeof(struct qm_sg_entry);
dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr,
size, DMA_BIDIRECTIONAL);
drv_req->cbk(drv_req, -EIO);
-
- return qman_cb_dqrr_consume;
}
static struct qman_fq *create_caam_req_fq(struct device *qidev,
@@ -274,7 +156,6 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev,
return ERR_PTR(-ENOMEM);
}
- req_fq->cb.dqrr = caam_req_fq_dqrr_cb;
req_fq->cb.ern = caam_fq_ern_cb;
req_fq->cb.fqs = NULL;
@@ -290,13 +171,11 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev,
flags = fq_sched_flag;
opts.we_mask = QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_DESTWQ |
- QM_INITFQ_WE_CONTEXTB | QM_INITFQ_WE_CONTEXTA |
- QM_INITFQ_WE_CGID;
+ QM_INITFQ_WE_CONTEXTB | QM_INITFQ_WE_CONTEXTA;
- opts.fqd.fq_ctrl = QM_FQCTRL_CPCSTASH | QM_FQCTRL_CGE;
+ opts.fqd.fq_ctrl = QM_FQCTRL_CPCSTASH;
opts.fqd.dest.channel = qm_channel_caam;
- opts.fqd.dest.wq = 3;
- opts.fqd.cgid = qipriv.req_cgr.cgrid;
+ opts.fqd.dest.wq = 0;
opts.fqd.context_b = qman_fq_fqid(rsp_fq);
opts.fqd.context_a.hi = upper_32_bits(hwdesc);
opts.fqd.context_a.lo = lower_32_bits(hwdesc);
@@ -635,18 +514,15 @@ int caam_qi_shutdown(struct device *qidev)
*/
set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
- ret = qman_delete_cgr(&priv->req_cgr);
- if (ret)
- dev_err(qidev, "Delete request CGR failed: %d\n", ret);
- else
- qman_release_cgrid(priv->req_cgr.cgrid);
-
ret = qman_delete_cgr(&priv->rsp_cgr);
if (ret)
dev_err(qidev, "Delete response CGR failed: %d\n", ret);
else
qman_release_cgrid(priv->rsp_cgr.cgrid);
+ /* Delete the pool channel */
+ qman_release_pool(*this_cpu_ptr(&pcpu_qipriv.pool));
+
/* Now that we're done with the CGRs, restore the cpus allowed mask */
set_cpus_allowed_ptr(current, &old_cpumask);
@@ -657,29 +533,12 @@ int caam_qi_shutdown(struct device *qidev)
static void rsp_cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr,
int congested)
{
- struct device *qidev = &per_cpu(pcpu_qipriv.net_dev,
- smp_processor_id()).dev;
-
- qipriv.cpu_congested = congested;
-
- if (congested)
- dev_warn(qidev, "CAAM rsp path congested\n");
- else
- dev_info(qidev, "CAAM rsp path congestion state exit\n");
-}
-
-static void req_cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr,
- int congested)
-{
- struct device *qidev = &per_cpu(pcpu_qipriv.net_dev,
- smp_processor_id()).dev;
-
- qipriv.sec_congested = congested;
+ caam_congested = congested;
if (congested)
- dev_warn(qidev, "CAAM req path congested\n");
+ pr_warn_ratelimited("CAAM rsp path congested\n");
else
- dev_info(qidev, "CAAM req path congestion state exit\n");
+ pr_info_ratelimited("CAAM rsp path congestion state exit\n");
}
static int caam_qi_napi_schedule(struct napi_struct *napi)
@@ -707,7 +566,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
struct napi_struct *napi;
struct caam_drv_req *drv_req;
const struct qm_fd *fd;
- size_t size;
+ const size_t size = 2 * sizeof(struct qm_sg_entry);
struct device *qidev = &per_cpu(pcpu_qipriv.net_dev,
smp_processor_id()).dev;
@@ -719,19 +578,18 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
if (unlikely(fd->status))
dev_err(qidev, "Error: %#x in CAAM response FD\n", fd->status);
- if (qm_fd_compound != fd->format) {
+ if (unlikely(qm_fd_compound != fd->format)) {
dev_err(qidev, "Non compound FD from CAAM\n");
return qman_cb_dqrr_consume;
}
- drv_req = fd_to_drv_req(fd);
- if (!drv_req) {
+ drv_req = (struct caam_drv_req *)phys_to_virt(fd->addr);
+ if (unlikely(!drv_req)) {
dev_err(qidev,
"Can't find original request for caam response\n");
return qman_cb_dqrr_consume;
}
- size = 2 * sizeof(struct qm_sg_entry);
dma_unmap_single(drv_req->drv_ctx->qidev, fd->addr,
size, DMA_BIDIRECTIONAL);
@@ -740,7 +598,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_consume;
}
-static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
+static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu, u32 pool)
{
struct qm_mcc_initfq opts;
struct qman_fq *fq;
@@ -751,8 +609,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
fq->cb.dqrr = caam_rsp_fq_dqrr_cb;
- flags = QMAN_FQ_FLAG_NO_ENQUEUE |
- QMAN_FQ_FLAG_DYNAMIC_FQID;
+ flags = QMAN_FQ_FLAG_NO_ENQUEUE | QMAN_FQ_FLAG_DYNAMIC_FQID;
ret = qman_create_fq(0, flags, fq);
if (ret) {
@@ -770,9 +627,9 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
QM_FQCTRL_CPCSTASH |
QM_FQCTRL_CGE;
- opts.fqd.dest.channel = qman_affine_channel(cpu);
+ opts.fqd.dest.channel = (u16)pool;
opts.fqd.cgid = qipriv.rsp_cgr.cgrid;
- opts.fqd.dest.wq = 1;
+ opts.fqd.dest.wq = 0;
opts.fqd.context_a.stashing.exclusive =
QM_STASHING_EXCL_CTX |
QM_STASHING_EXCL_DATA;
@@ -793,6 +650,9 @@ static int alloc_cgrs(struct device *qidev)
{
struct qm_mcc_initcgr opts;
int ret;
+ const u64 cpus = *(u64 *)qman_affine_cpus();
+ const int num_cpus = __builtin_popcountll(cpus);
+ u64 val;
/*Allocate response CGR*/
ret = qman_alloc_cgrid(&qipriv.rsp_cgr.cgrid);
@@ -807,64 +667,69 @@ static int alloc_cgrs(struct device *qidev)
QM_CGR_WE_MODE;
opts.cgr.cscn_en = QM_CGR_EN;
opts.cgr.mode = QMAN_CGR_MODE_FRAME;
- qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, 0x400 , 1);
+#ifdef CONFIG_FSL_DPAA_ETH
+ /*
+ * This effectively sets the to-CPU threshold equal to half of the
+ * number of buffers available to dpa_eth driver. It means that at most
+ * half of the buffers can be in the pool channel from SEC, waiting
+ * to be transmitted to the core (and then on the TX queues).
+ * NOTE: This is an arbitrary division; the factor '2' below could
+ * also be '3' or '4'. It also depends on the number of devices
+ * using the dpa_eth buffers (which can be >1 if f.i. PME/DCE are
+ * also used.
+ */
+ val = num_cpus * CONFIG_FSL_DPAA_ETH_MAX_BUF_COUNT / 2;
+#else
+ val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
+#endif
+ qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, val, 1);
ret = qman_create_cgr(&qipriv.rsp_cgr,
QMAN_CGR_FLAG_USE_INIT, &opts);
if (ret) {
dev_err(qidev, "Error %d creating CAAM rsp CGRID: %u\n",
ret, qipriv.rsp_cgr.cgrid);
- goto create_rsp_cgr_fail;
- }
-
- /*Allocate request CGR*/
- ret = qman_alloc_cgrid(&qipriv.req_cgr.cgrid);
- if (ret) {
- dev_err(qidev, "CGR alloc failed for req FQs");
- goto alloc_req_cgrid_fail;
- }
-
- qipriv.req_cgr.cb = req_cgr_cb;
- memset(&opts, 0, sizeof(opts));
- opts.we_mask = QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES;
- opts.cgr.cscn_en = QM_CGR_EN;
- qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, CAAM_REQ_CGR_THRESHOLD , 1);
-
- ret = qman_create_cgr(&qipriv.req_cgr,
- QMAN_CGR_FLAG_USE_INIT, &opts);
- if (ret) {
- dev_err(qidev, "Error %d creating CAAM req CGRID: %u\n",
- ret, qipriv.req_cgr.cgrid);
- goto create_req_cgr_fail;
+ return ret;
}
return 0;
+}
-create_req_cgr_fail:
- qman_release_cgrid(qipriv.req_cgr.cgrid);
-
-alloc_req_cgrid_fail:
- qman_delete_cgr(&qipriv.rsp_cgr);
-
-create_rsp_cgr_fail:
- qman_release_cgrid(qipriv.rsp_cgr.cgrid);
-
- return ret;
+static inline void add_cpu2pool(int cpu, u32 pool)
+{
+ struct qman_portal *portal =
+ (struct qman_portal *)qman_get_affine_portal(cpu);
+ qman_p_static_dequeue_add(portal,
+ QM_SDQCR_CHANNELS_POOL_CONV((u16)pool));
}
static int alloc_rsp_fqs(struct device *qidev)
{
const cpumask_t *cpus = qman_affine_cpus();
int ret, i;
+ u32 pool;
+
+ ret = qman_alloc_pool(&pool);
+ if (ret) {
+ dev_err(qidev, "CAAM pool alloc failed: %d\n", ret);
+ return ret;
+ }
/*Now create response FQs*/
for_each_cpu(i, cpus) {
- ret = alloc_rsp_fq_cpu(qidev, i);
+ ret = alloc_rsp_fq_cpu(qidev, i, pool);
if (ret) {
dev_err(qidev, "CAAM rsp FQ alloc failed, cpu: %u", i);
return ret;
}
+ add_cpu2pool(i, pool);
}
+ /*
+ * The pool will be used (i.e. set as destination only from this CPU
+ * (the CPU performing the initialization).
+ */
+ *this_cpu_ptr(&pcpu_qipriv.pool) = pool;
+
return 0;
}
@@ -905,6 +770,9 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
return -ENODEV;
}
+ /* Response path cannot be congested */
+ caam_congested = false;
+
/* Initialise the CGRs congestion detection */
err = alloc_cgrs(qidev);
if (err) {
@@ -928,9 +796,6 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
for_each_cpu(i, cpus) {
per_cpu(pcpu_qipriv.net_dev, i).dev = *qidev;
- spin_lock_init(&per_cpu(pcpu_qipriv.listlock, i));
- INIT_LIST_HEAD(&per_cpu(pcpu_qipriv.bklog_list, i));
-
INIT_LIST_HEAD(&per_cpu(pcpu_qipriv.net_dev, i).napi_list);
netif_napi_add(&per_cpu(pcpu_qipriv.net_dev, i),
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 109f890..e8e7c1a 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -85,10 +85,6 @@ struct caam_drv_req {
struct caam_drv_ctx *drv_ctx;
caam_qi_cbk cbk;
void *app_ctx;
-
- /* The fields below are for internal use by QI backend driver */
- struct list_head hdr__;
- dma_addr_t hwaddr;
} ____cacheline_aligned;
/*