crypto: caam - change QI memory allocation on hotpath

This patch changes the allocation of memory on the hotpath, by replacing the malloc allocation with a kmem_cache backed allocation. Change-Id: I55a21bb7cbd022f6b86bd784988491b551a0f81e Signed-off-by: Alex Porosanu <alexandru.porosanu@freescale.com> Reviewed-on: http://git.am.freescale.net:8181/31371 Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com> Reviewed-by: Mircea Pop <mircea.pop@freescale.com> Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
author: Alex Porosanu <alexandru.porosanu@freescale.com> 2015-02-18 18:46:02 (GMT)
committer: Honghua Yin <Hong-Hua.Yin@freescale.com> 2015-04-01 09:23:03 (GMT)
commit: 4ada5f182eb5eab67f85d712d5f77044f64c5f0d (patch)
tree: f61e015b82afd42cb1e34f4baf2813c566bec11f /drivers/crypto
parent: f462376952a15901b408f09d2a1e0675aa628e86 (diff)
download: linux-fsl-qoriq-4ada5f182eb5eab67f85d712d5f77044f64c5f0d.tar.xz
3 files changed, 77 insertions, 14 deletions
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 26546db..81b82bd 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -970,7 +970,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
 	aead_unmap(qidev, edesc, aead_req);
 
 	aead_request_complete(aead_req, ecode);
-	kfree(edesc);
+	qi_cache_free(edesc);
 }
 
 /* For now, identical to aead_done */
@@ -994,7 +994,7 @@ static inline void tls_encrypt_done(struct caam_drv_req *drv_req, u32 status)
 	aead_unmap(qidev, edesc, aead_req);
 
 	aead_request_complete(aead_req, ecode);
-	kfree(edesc);
+	qi_cache_free(edesc);
 }
 
 static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status)
@@ -1046,7 +1046,7 @@ static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status)
 
 out:
 	aead_request_complete(aead_req, ecode);
-	kfree(edesc);
+	qi_cache_free(edesc);
 }
 
 /*
@@ -1120,9 +1120,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes,
-				GFP_DMA | flags);
-	if (!edesc) {
+	edesc = qi_cache_alloc(GFP_DMA | flags);
+	if (unlikely(!edesc)) {
 		dev_err(qidev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1276,7 +1275,7 @@ static int aead_encrypt(struct aead_request *req)
 		ret = -EINPROGRESS;
 	} else {
 		aead_unmap(qidev, edesc, req);
-		kfree(edesc);
+		qi_cache_free(edesc);
 	}
 
 	return ret;
@@ -1318,7 +1317,7 @@ static int aead_decrypt(struct aead_request *req)
 		ret = -EINPROGRESS;
 	} else {
 		aead_unmap(qidev, edesc, req);
-		kfree(edesc);
+		qi_cache_free(edesc);
 	}
 
 	return ret;
@@ -1371,7 +1370,7 @@ static int tls_encrypt(struct aead_request *req)
 		ret = -EINPROGRESS;
 	} else {
 		aead_unmap(qidev, edesc, req);
-		kfree(edesc);
+		qi_cache_free(edesc);
 	}
 
 	return ret;
@@ -1424,7 +1423,7 @@ static int tls_decrypt(struct aead_request *req)
 		ret = -EINPROGRESS;
 	} else {
 		aead_unmap(qidev, edesc, req);
-		kfree(edesc);
+		qi_cache_free(edesc);
 	}
 
 	return ret;
@@ -1505,9 +1504,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes,
-				GFP_DMA | flags);
-	if (!edesc) {
+	edesc = qi_cache_alloc(GFP_DMA | flags);
+	if (unlikely(!edesc)) {
 		dev_err(qidev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1625,7 +1623,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq)
 		ret = -EINPROGRESS;
 	} else {
 		aead_unmap(qidev, edesc, req);
-		kfree(edesc);
+		qi_cache_free(edesc);
 	}
 
 	return ret;
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 52d3283..9937cd4 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -21,6 +21,9 @@
 /* If DPA_ETH is not available, then use a reasonably backlog per CPU */
 #define MAX_RSP_FQ_BACKLOG_PER_CPU	64
 #endif
+#define CAAM_QI_MEMCACHE_SIZE	256	/* Length of a single buffer in
+					   the QI driver memory cache. */
+
 /*
  * The jobs are processed by the driver against a driver context.
  * With every cryptographic context, a driver context is attached.
@@ -72,6 +75,18 @@ static bool caam_congested __read_mostly;
  */
 static int mod_init_cpu;
 
+/*
+ * This is a a cache of buffers, from which the users of CAAM QI driver
+ * can allocate short (currently 128B) buffers. It's speedier than
+ * doing malloc on the hotpath.
+ * NOTE: A more elegant solution would be to have some headroom in the frames
+ *       being processed. This can be added by the dpa_eth driver. This would
+ *       pose a problem for userspace application processing which cannot
+ *       know of this limitation. So for now, this will work.
+ * NOTE: The memcache is SMP-safe. No need to handle spinlocks in-here
+ */
+static struct kmem_cache *qi_cache;
+
 bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx)
 {
 	return caam_congested;
@@ -462,6 +477,18 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
 }
 EXPORT_SYMBOL(caam_drv_ctx_init);
 
+void *qi_cache_alloc(gfp_t flags)
+{
+	return kmem_cache_alloc(qi_cache, flags);
+}
+EXPORT_SYMBOL(qi_cache_alloc);
+
+void qi_cache_free(void *obj)
+{
+	kmem_cache_free(qi_cache, obj);
+}
+EXPORT_SYMBOL(qi_cache_free);
+
 static int caam_qi_poll(struct napi_struct *napi, int budget)
 {
 	int cleaned = qman_poll_dqrr(budget);
@@ -526,6 +553,9 @@ int caam_qi_shutdown(struct device *qidev)
 	/* Delete the pool channel */
 	qman_release_pool(*this_cpu_ptr(&pcpu_qipriv.pool));
 
+	if (qi_cache)
+		kmem_cache_destroy(qi_cache);
+
 	/* Now that we're done with the CGRs, restore the cpus allowed mask */
 	set_cpus_allowed_ptr(current, &old_cpumask);
 
@@ -783,6 +813,9 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
 	/* Response path cannot be congested */
 	caam_congested = false;
 
+	/* kmem_cache wasn't yet allocated */
+	qi_cache = NULL;
+
 	/* Initialise the CGRs congestion detection */
 	err = alloc_cgrs(qidev);
 	if (err) {
@@ -818,6 +851,14 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np)
 	/* Hook up QI device to parent controlling caam device */
 	ctrlpriv->qidev = qidev;
 
+	qi_cache = kmem_cache_create("caamqicache", 256, 0,
+				     SLAB_CACHE_DMA, NULL);
+	if (!qi_cache) {
+		dev_err(qidev, "Can't allocate SEC cache\n");
+		platform_device_unregister(qi_pdev);
+		return err;
+	}
+
 	/* Done with the CGRs; restore the cpus allowed mask */
 	set_cpus_allowed_ptr(current, &old_cpumask);
 
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index e8e7c1a..33f7470 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -150,4 +150,28 @@ extern void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
 extern int caam_qi_init(struct platform_device *pdev, struct device_node *np);
 extern int caam_qi_shutdown(struct device *dev);
 
+/*
+ * qi_cache_alloc - Allocate buffers from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) needs data which has
+ * to be allocated on the hotpath. Instead of using malloc, one can use the
+ * services of the CAAM QI memory cache (backed by kmem_cache). The buffers
+ * will have a size of 256B, which is sufficient for hosting 16 SG entries.
+ *
+ * flags -	flags that would be used for the equivalent malloc(..) call
+ * *
+ * Returns a pointer to a retrieved buffer on success or NULL on failure.
+ */
+extern void *qi_cache_alloc(gfp_t flags);
+
+/*
+ * qi_cache_free - Frees buffers allocated from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) no longer needs
+ * the buffer previously allocated by a qi_cache_alloc call.
+ * No checking is being done, the call is a passthrough call to
+ * kmem_cache_free(...)
+ */
+extern void qi_cache_free(void *obj);
+
 #endif /* QI_H */
author	Alex Porosanu <alexandru.porosanu@freescale.com>	2015-02-18 18:46:02 (GMT)
committer	Honghua Yin <Hong-Hua.Yin@freescale.com>	2015-04-01 09:23:03 (GMT)
commit	4ada5f182eb5eab67f85d712d5f77044f64c5f0d (patch)
tree	f61e015b82afd42cb1e34f4baf2813c566bec11f /drivers/crypto
parent	f462376952a15901b408f09d2a1e0675aa628e86 (diff)
download	linux-fsl-qoriq-4ada5f182eb5eab67f85d712d5f77044f64c5f0d.tar.xz