summaryrefslogtreecommitdiff
path: root/drivers/crypto
diff options
context:
space:
mode:
authorAlex Porosanu <alexandru.porosanu@freescale.com>2015-02-18 18:46:02 (GMT)
committerHonghua Yin <Hong-Hua.Yin@freescale.com>2015-04-01 09:23:03 (GMT)
commit4ada5f182eb5eab67f85d712d5f77044f64c5f0d (patch)
treef61e015b82afd42cb1e34f4baf2813c566bec11f /drivers/crypto
parentf462376952a15901b408f09d2a1e0675aa628e86 (diff)
downloadlinux-fsl-qoriq-4ada5f182eb5eab67f85d712d5f77044f64c5f0d.tar.xz
crypto: caam - change QI memory allocation on hotpath
This patch changes the allocation of memory on the hotpath, by replacing the malloc allocation with a kmem_cache backed allocation. Change-Id: I55a21bb7cbd022f6b86bd784988491b551a0f81e Signed-off-by: Alex Porosanu <alexandru.porosanu@freescale.com> Reviewed-on: http://git.am.freescale.net:8181/31371 Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com> Reviewed-by: Mircea Pop <mircea.pop@freescale.com> Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/caam/caamalg_qi.c26
-rw-r--r--drivers/crypto/caam/qi.c41
-rw-r--r--drivers/crypto/caam/qi.h24
3 files changed, 77 insertions, 14 deletions
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 26546db..81b82bd 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -970,7 +970,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
aead_unmap(qidev, edesc, aead_req);
aead_request_complete(aead_req, ecode);
- kfree(edesc);
+ qi_cache_free(edesc);
}
/* For now, identical to aead_done */
@@ -994,7 +994,7 @@ static inline void tls_encrypt_done(struct caam_drv_req *drv_req, u32 status)
aead_unmap(qidev, edesc, aead_req);
aead_request_complete(aead_req, ecode);
- kfree(edesc);
+ qi_cache_free(edesc);
}
static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status)
@@ -1046,7 +1046,7 @@ static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status)
out:
aead_request_complete(aead_req, ecode);
- kfree(edesc);
+ qi_cache_free(edesc);
}
/*
@@ -1120,9 +1120,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes,
- GFP_DMA | flags);
- if (!edesc) {
+ edesc = qi_cache_alloc(GFP_DMA | flags);
+ if (unlikely(!edesc)) {
dev_err(qidev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
}
@@ -1276,7 +1275,7 @@ static int aead_encrypt(struct aead_request *req)
ret = -EINPROGRESS;
} else {
aead_unmap(qidev, edesc, req);
- kfree(edesc);
+ qi_cache_free(edesc);
}
return ret;
@@ -1318,7 +1317,7 @@ static int aead_decrypt(struct aead_request *req)
ret = -EINPROGRESS;
} else {
aead_unmap(qidev, edesc, req);
- kfree(edesc);
+ qi_cache_free(edesc);
}
return ret;
@@ -1371,7 +1370,7 @@ static int tls_encrypt(struct aead_request *req)
ret = -EINPROGRESS;
} else {
aead_unmap(qidev, edesc, req);
- kfree(edesc);
+ qi_cache_free(edesc);
}
return ret;
@@ -1424,7 +1423,7 @@ static int tls_decrypt(struct aead_request *req)
ret = -EINPROGRESS;
} else {
aead_unmap(qidev, edesc, req);
- kfree(edesc);
+ qi_cache_free(edesc);
}
return ret;
@@ -1505,9 +1504,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes,
- GFP_DMA | flags);
- if (!edesc) {
+ edesc = qi_cache_alloc(GFP_DMA | flags);
+ if (unlikely(!edesc)) {
dev_err(qidev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
}
@@ -1625,7 +1623,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq)
ret = -EINPROGRESS;
} else {
aead_unmap(qidev, edesc, req);
- kfree(edesc);
+ qi_cache_free(edesc);
}
return ret;
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 52d3283..9937cd4 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -21,6 +21,9 @@
/* If DPA_ETH is not available, then use a reasonably backlog per CPU */
#define MAX_RSP_FQ_BACKLOG_PER_CPU 64
#endif
+#define CAAM_QI_MEMCACHE_SIZE 256 /* Length of a single buffer in
+ the QI driver memory cache. */
+
/*
* The jobs are processed by the driver against a driver context.
* With every cryptographic context, a driver context is attached.
@@ -72,6 +75,18 @@ static bool caam_congested __read_mostly;
*/
static int mod_init_cpu;
+/*
+ * This is a a cache of buffers, from which the users of CAAM QI driver
+ * can allocate short (currently 128B) buffers. It's speedier than
+ * doing malloc on the hotpath.
+ * NOTE: A more elegant solution would be to have some headroom in the frames
+ * being processed. This can be added by the dpa_eth driver. This would
+ * pose a problem for userspace application processing which cannot
+ * know of this limitation. So for now, this will work.
+ * NOTE: The memcache is SMP-safe. No need to handle spinlocks in-here
+ */
+static struct kmem_cache *qi_cache;
+
bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx)
{
return caam_congested;
@@ -462,6 +477,18 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
}
EXPORT_SYMBOL(caam_drv_ctx_init);
+void *qi_cache_alloc(gfp_t flags)
+{
+ return kmem_cache_alloc(qi_cache, flags);
+}
+EXPORT_SYMBOL(qi_cache_alloc);
+
+void qi_cache_free(void *obj)
+{
+ kmem_cache_free(qi_cache, obj);
+}
+EXPORT_SYMBOL(qi_cache_free);
+
static int caam_qi_poll(struct napi_struct *napi, int budget)
{
int cleaned = qman_poll_dqrr(budget);
@@ -526,6 +553,9 @@ int caam_qi_shutdown(struct device *qidev)
/* Delete the pool channel */
qman_release_pool(*this_cpu_ptr(&pcpu_qipriv.pool));
+ if (qi_cache)
+ kmem_cache_destroy(qi_cache);
+
/* Now that we're done with the CGRs, restore the cpus allowed mask */
set_cpus_allowed_ptr(current, &old_cpumask);
@@ -783,6 +813,9 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
/* Response path cannot be congested */
caam_congested = false;
+ /* kmem_cache wasn't yet allocated */
+ qi_cache = NULL;
+
/* Initialise the CGRs congestion detection */
err = alloc_cgrs(qidev);
if (err) {
@@ -818,6 +851,14 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
/* Hook up QI device to parent controlling caam device */
ctrlpriv->qidev = qidev;
+ qi_cache = kmem_cache_create("caamqicache", 256, 0,
+ SLAB_CACHE_DMA, NULL);
+ if (!qi_cache) {
+ dev_err(qidev, "Can't allocate SEC cache\n");
+ platform_device_unregister(qi_pdev);
+ return err;
+ }
+
/* Done with the CGRs; restore the cpus allowed mask */
set_cpus_allowed_ptr(current, &old_cpumask);
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index e8e7c1a..33f7470 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -150,4 +150,28 @@ extern void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
extern int caam_qi_init(struct platform_device *pdev, struct device_node *np);
extern int caam_qi_shutdown(struct device *dev);
+/*
+ * qi_cache_alloc - Allocate buffers from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) needs data which has
+ * to be allocated on the hotpath. Instead of using malloc, one can use the
+ * services of the CAAM QI memory cache (backed by kmem_cache). The buffers
+ * will have a size of 256B, which is sufficient for hosting 16 SG entries.
+ *
+ * flags - flags that would be used for the equivalent malloc(..) call
+ * *
+ * Returns a pointer to a retrieved buffer on success or NULL on failure.
+ */
+extern void *qi_cache_alloc(gfp_t flags);
+
+/*
+ * qi_cache_free - Frees buffers allocated from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) no longer needs
+ * the buffer previously allocated by a qi_cache_alloc call.
+ * No checking is being done, the call is a passthrough call to
+ * kmem_cache_free(...)
+ */
+extern void qi_cache_free(void *obj);
+
#endif /* QI_H */