From 4ada5f182eb5eab67f85d712d5f77044f64c5f0d Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Wed, 18 Feb 2015 20:46:02 +0200 Subject: crypto: caam - change QI memory allocation on hotpath This patch changes the allocation of memory on the hotpath, by replacing the malloc allocation with a kmem_cache backed allocation. Change-Id: I55a21bb7cbd022f6b86bd784988491b551a0f81e Signed-off-by: Alex Porosanu Reviewed-on: http://git.am.freescale.net:8181/31371 Tested-by: Review Code-CDREVIEW Reviewed-by: Mircea Pop Reviewed-by: Honghua Yin diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 26546db..81b82bd 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -970,7 +970,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status) aead_unmap(qidev, edesc, aead_req); aead_request_complete(aead_req, ecode); - kfree(edesc); + qi_cache_free(edesc); } /* For now, identical to aead_done */ @@ -994,7 +994,7 @@ static inline void tls_encrypt_done(struct caam_drv_req *drv_req, u32 status) aead_unmap(qidev, edesc, aead_req); aead_request_complete(aead_req, ecode); - kfree(edesc); + qi_cache_free(edesc); } static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status) @@ -1046,7 +1046,7 @@ static inline void tls_decrypt_done(struct caam_drv_req *drv_req, u32 status) out: aead_request_complete(aead_req, ecode); - kfree(edesc); + qi_cache_free(edesc); } /* @@ -1120,9 +1120,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes, - GFP_DMA | flags); - if (!edesc) { + edesc = qi_cache_alloc(GFP_DMA | flags); + if (unlikely(!edesc)) { dev_err(qidev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); } @@ -1276,7 +1275,7 @@ static int aead_encrypt(struct aead_request *req) ret = -EINPROGRESS; } else { aead_unmap(qidev, edesc, req); - kfree(edesc); + qi_cache_free(edesc); } return ret; @@ -1318,7 +1317,7 @@ static int aead_decrypt(struct aead_request *req) ret = -EINPROGRESS; } else { aead_unmap(qidev, edesc, req); - kfree(edesc); + qi_cache_free(edesc); } return ret; @@ -1371,7 +1370,7 @@ static int tls_encrypt(struct aead_request *req) ret = -EINPROGRESS; } else { aead_unmap(qidev, edesc, req); - kfree(edesc); + qi_cache_free(edesc); } return ret; @@ -1424,7 +1423,7 @@ static int tls_decrypt(struct aead_request *req) ret = -EINPROGRESS; } else { aead_unmap(qidev, edesc, req); - kfree(edesc); + qi_cache_free(edesc); } return ret; @@ -1505,9 +1504,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = kmalloc(sizeof(struct aead_edesc) + qm_sg_bytes, - GFP_DMA | flags); - if (!edesc) { + edesc = qi_cache_alloc(GFP_DMA | flags); + if (unlikely(!edesc)) { dev_err(qidev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); } @@ -1625,7 +1623,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq) ret = -EINPROGRESS; } else { aead_unmap(qidev, edesc, req); - kfree(edesc); + qi_cache_free(edesc); } return ret; diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 52d3283..9937cd4 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -21,6 +21,9 @@ /* If DPA_ETH is not available, then use a reasonably backlog per CPU */ #define MAX_RSP_FQ_BACKLOG_PER_CPU 64 #endif +#define CAAM_QI_MEMCACHE_SIZE 256 /* Length of a single buffer in + the QI driver memory cache. */ + /* * The jobs are processed by the driver against a driver context. * With every cryptographic context, a driver context is attached. @@ -72,6 +75,18 @@ static bool caam_congested __read_mostly; */ static int mod_init_cpu; +/* + * This is a a cache of buffers, from which the users of CAAM QI driver + * can allocate short (currently 128B) buffers. It's speedier than + * doing malloc on the hotpath. + * NOTE: A more elegant solution would be to have some headroom in the frames + * being processed. This can be added by the dpa_eth driver. This would + * pose a problem for userspace application processing which cannot + * know of this limitation. So for now, this will work. + * NOTE: The memcache is SMP-safe. No need to handle spinlocks in-here + */ +static struct kmem_cache *qi_cache; + bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx) { return caam_congested; @@ -462,6 +477,18 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, } EXPORT_SYMBOL(caam_drv_ctx_init); +void *qi_cache_alloc(gfp_t flags) +{ + return kmem_cache_alloc(qi_cache, flags); +} +EXPORT_SYMBOL(qi_cache_alloc); + +void qi_cache_free(void *obj) +{ + kmem_cache_free(qi_cache, obj); +} +EXPORT_SYMBOL(qi_cache_free); + static int caam_qi_poll(struct napi_struct *napi, int budget) { int cleaned = qman_poll_dqrr(budget); @@ -526,6 +553,9 @@ int caam_qi_shutdown(struct device *qidev) /* Delete the pool channel */ qman_release_pool(*this_cpu_ptr(&pcpu_qipriv.pool)); + if (qi_cache) + kmem_cache_destroy(qi_cache); + /* Now that we're done with the CGRs, restore the cpus allowed mask */ set_cpus_allowed_ptr(current, &old_cpumask); @@ -783,6 +813,9 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np) /* Response path cannot be congested */ caam_congested = false; + /* kmem_cache wasn't yet allocated */ + qi_cache = NULL; + /* Initialise the CGRs congestion detection */ err = alloc_cgrs(qidev); if (err) { @@ -818,6 +851,14 @@ int caam_qi_init(struct platform_device *caam_pdev, struct device_node *np) /* Hook up QI device to parent controlling caam device */ ctrlpriv->qidev = qidev; + qi_cache = kmem_cache_create("caamqicache", 256, 0, + SLAB_CACHE_DMA, NULL); + if (!qi_cache) { + dev_err(qidev, "Can't allocate SEC cache\n"); + platform_device_unregister(qi_pdev); + return err; + } + /* Done with the CGRs; restore the cpus allowed mask */ set_cpus_allowed_ptr(current, &old_cpumask); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index e8e7c1a..33f7470 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -150,4 +150,28 @@ extern void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx); extern int caam_qi_init(struct platform_device *pdev, struct device_node *np); extern int caam_qi_shutdown(struct device *dev); +/* + * qi_cache_alloc - Allocate buffers from CAAM-QI cache + * + * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) needs data which has + * to be allocated on the hotpath. Instead of using malloc, one can use the + * services of the CAAM QI memory cache (backed by kmem_cache). The buffers + * will have a size of 256B, which is sufficient for hosting 16 SG entries. + * + * flags - flags that would be used for the equivalent malloc(..) call + * * + * Returns a pointer to a retrieved buffer on success or NULL on failure. + */ +extern void *qi_cache_alloc(gfp_t flags); + +/* + * qi_cache_free - Frees buffers allocated from CAAM-QI cache + * + * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) no longer needs + * the buffer previously allocated by a qi_cache_alloc call. + * No checking is being done, the call is a passthrough call to + * kmem_cache_free(...) + */ +extern void qi_cache_free(void *obj); + #endif /* QI_H */ -- cgit v0.10.2