From 65df57743924c3d13e1fa1bcf5bf70fe874fcdfd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 24 May 2012 11:13:42 +0200 Subject: crypto: sha1 - use Kbuild supplied flags for AVX test Commit ea4d26ae ("raid5: add AVX optimized RAID5 checksumming") introduced x86/ arch wide defines for AFLAGS and CFLAGS indicating AVX support in binutils based on the same test we have in x86/crypto/ right now. To minimize duplication drop our implementation in favour to the one in x86/. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac0..479f95a7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -34,12 +34,5 @@ salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o - ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o - -# enable AVX support only when $(AS) can actually assemble the instructions -ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) -AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT -CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT -endif sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57..49d6987 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -468,7 +468,7 @@ W_PRECALC_SSSE3 */ SHA1_VECTOR_ASM sha1_transform_ssse3 -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX .macro W_PRECALC_AVX diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499..4a11a9d 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -35,7 +35,7 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, unsigned int rounds); -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX asmlinkage void sha1_transform_avx(u32 *digest, const char *data, unsigned int rounds); #endif @@ -184,7 +184,7 @@ static struct shash_alg alg = { } }; -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { u64 xcr0; @@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) if (cpu_has_ssse3) sha1_transform_asm = sha1_transform_ssse3; -#ifdef SHA1_ENABLE_AVX_SUPPORT +#ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) sha1_transform_asm = sha1_transform_avx; -- cgit v0.10.2 From a482b081a2d4d74d16bc9ea8779f9f6055f95852 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Fri, 25 May 2012 17:54:13 +0800 Subject: crypto: testmgr - Add new test cases for Blackfin CRC crypto driver Signed-off-by: Sonic Zhang Acked-by: Mike Frysinger Signed-off-by: Herbert Xu diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 8f147bf..750cce4 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1192,6 +1192,9 @@ static int do_test(int m) case 109: ret += tcrypt_test("vmac(aes)"); break; + case 110: + ret += tcrypt_test("hmac(crc32)"); + break; case 150: ret += tcrypt_test("ansi_cprng"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5674878..eb6d20f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2220,6 +2220,15 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "hmac(crc32)", + .test = alg_test_hash, + .suite = { + .hash = { + .vecs = bfin_crc_tv_template, + .count = BFIN_CRC_TEST_VECTORS + } + } + }, { .alg = "hmac(md5)", .test = alg_test_hash, .suite = { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 36e5a8e..34a9d51 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14858,4 +14858,94 @@ static struct hash_testvec crc32c_tv_template[] = { }, }; +/* + * Blakcifn CRC test vectors + */ +#define BFIN_CRC_TEST_VECTORS 6 + +static struct hash_testvec bfin_crc_tv_template[] = { + { + .psize = 0, + .digest = "\x00\x00\x00\x00", + }, + { + .key = "\x87\xa9\xcb\xed", + .ksize = 4, + .psize = 0, + .digest = "\x87\xa9\xcb\xed", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26\x27\x28", + .psize = 40, + .digest = "\x84\x0c\x8d\xa2", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26", + .psize = 38, + .digest = "\x8c\x58\xec\xb7", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26\x27", + .psize = 39, + .digest = "\xdc\x50\x28\x7b", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26\x27\x28" + "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30" + "\x31\x32\x33\x34\x35\x36\x37\x38" + "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40" + "\x41\x42\x43\x44\x45\x46\x47\x48" + "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50" + "\x51\x52\x53\x54\x55\x56\x57\x58" + "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60" + "\x61\x62\x63\x64\x65\x66\x67\x68" + "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" + "\x71\x72\x73\x74\x75\x76\x77\x78" + "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80" + "\x81\x82\x83\x84\x85\x86\x87\x88" + "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90" + "\x91\x92\x93\x94\x95\x96\x97\x98" + "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0" + "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8" + "\xa9\xaa\xab\xac\xad\xae\xaf\xb0" + "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8" + "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0" + "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8" + "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0" + "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8" + "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0" + "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8" + "\xe9\xea\xeb\xec\xed\xee\xef\xf0", + .psize = 240, + .digest = "\x10\x19\x4a\x5c", + .np = 2, + .tap = { 31, 209 } + }, + +}; + #endif /* _CRYPTO_TESTMGR_H */ -- cgit v0.10.2 From b8840098b70c11d70c29263e0765f103e6cbe55e Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Mon, 4 Jun 2012 12:24:47 +0800 Subject: crypto: bfin_crc - CRC hardware driver for BF60x family processors. The CRC peripheral is a hardware block used to compute the CRC of the block of data. This is based on a CRC32 engine which computes the CRC value of 32b data words presented to it. For data words of < 32b in size, this driver pack 0 automatically into 32b data units. This driver implements the async hash crypto framework API. Signed-off-by: Sonic Zhang Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1092a77..dd4d5af 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -325,4 +325,11 @@ if CRYPTO_DEV_UX500 source "drivers/crypto/ux500/Kconfig" endif # if CRYPTO_DEV_UX500 +config CRYPTO_DEV_BFIN_CRC + tristate "Support for Blackfin CRC hardware" + depends on BF60x + help + Newer Blackfin processors have CRC hardware. Select this if you + want to use the Blackfin CRC module. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 0139032..d5062bb 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -14,4 +14,5 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o -obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ \ No newline at end of file +obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ +obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c new file mode 100644 index 0000000..5398580 --- /dev/null +++ b/drivers/crypto/bfin_crc.c @@ -0,0 +1,780 @@ +/* + * Cryptographic API. + * + * Support Blackfin CRC HW acceleration. + * + * Copyright 2012 Analog Devices Inc. + * + * Licensed under the GPL-2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define CRC_CCRYPTO_QUEUE_LENGTH 5 + +#define DRIVER_NAME "bfin-hmac-crc" +#define CHKSUM_DIGEST_SIZE 4 +#define CHKSUM_BLOCK_SIZE 1 + +#define CRC_MAX_DMA_DESC 100 + +#define CRC_CRYPTO_STATE_UPDATE 1 +#define CRC_CRYPTO_STATE_FINALUPDATE 2 +#define CRC_CRYPTO_STATE_FINISH 3 + +struct bfin_crypto_crc { + struct list_head list; + struct device *dev; + spinlock_t lock; + + int irq; + int dma_ch; + u32 poly; + volatile struct crc_register *regs; + + struct ahash_request *req; /* current request in operation */ + struct dma_desc_array *sg_cpu; /* virt addr of sg dma descriptors */ + dma_addr_t sg_dma; /* phy addr of sg dma descriptors */ + u8 *sg_mid_buf; + + struct tasklet_struct done_task; + struct crypto_queue queue; /* waiting requests */ + + u8 busy:1; /* crc device in operation flag */ +}; + +static struct bfin_crypto_crc_list { + struct list_head dev_list; + spinlock_t lock; +} crc_list; + +struct bfin_crypto_crc_reqctx { + struct bfin_crypto_crc *crc; + + unsigned int total; /* total request bytes */ + size_t sg_buflen; /* bytes for this update */ + unsigned int sg_nents; + struct scatterlist *sg; /* sg list head for this update*/ + struct scatterlist bufsl[2]; /* chained sg list */ + + size_t bufnext_len; + size_t buflast_len; + u8 bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */ + u8 buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */ + + u8 flag; +}; + +struct bfin_crypto_crc_ctx { + struct bfin_crypto_crc *crc; + u32 key; +}; + + +/* + * derive number of elements in scatterlist + */ +static int sg_count(struct scatterlist *sg_list) +{ + struct scatterlist *sg = sg_list; + int sg_nents = 1; + + if (sg_list == NULL) + return 0; + + while (!sg_is_last(sg)) { + sg_nents++; + sg = scatterwalk_sg_next(sg); + } + + return sg_nents; +} + +/* + * get element in scatter list by given index + */ +static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents, + unsigned int index) +{ + struct scatterlist *sg = NULL; + int i; + + for_each_sg(sg_list, sg, nents, i) + if (i == index) + break; + + return sg; +} + +static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key) +{ + crc->regs->datacntrld = 0; + crc->regs->control = MODE_CALC_CRC << OPMODE_OFFSET; + crc->regs->curresult = key; + + /* setup CRC interrupts */ + crc->regs->status = CMPERRI | DCNTEXPI; + crc->regs->intrenset = CMPERRI | DCNTEXPI; + SSYNC(); + + return 0; +} + +static int bfin_crypto_crc_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm); + struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); + struct bfin_crypto_crc *crc; + + dev_dbg(crc->dev, "crc_init\n"); + spin_lock_bh(&crc_list.lock); + list_for_each_entry(crc, &crc_list.dev_list, list) { + crc_ctx->crc = crc; + break; + } + spin_unlock_bh(&crc_list.lock); + + if (sg_count(req->src) > CRC_MAX_DMA_DESC) { + dev_dbg(crc->dev, "init: requested sg list is too big > %d\n", + CRC_MAX_DMA_DESC); + return -EINVAL; + } + + ctx->crc = crc; + ctx->bufnext_len = 0; + ctx->buflast_len = 0; + ctx->sg_buflen = 0; + ctx->total = 0; + ctx->flag = 0; + + /* init crc results */ + put_unaligned_le32(crc_ctx->key, req->result); + + dev_dbg(crc->dev, "init: digest size: %d\n", + crypto_ahash_digestsize(tfm)); + + return bfin_crypto_crc_init_hw(crc, crc_ctx->key); +} + +static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) +{ + struct scatterlist *sg; + struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req); + int i = 0, j = 0; + unsigned long dma_config; + unsigned int dma_count; + unsigned int dma_addr; + unsigned int mid_dma_count = 0; + int dma_mod; + + dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE); + + for_each_sg(ctx->sg, sg, ctx->sg_nents, j) { + dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32; + dma_addr = sg_dma_address(sg); + /* deduce extra bytes in last sg */ + if (sg_is_last(sg)) + dma_count = sg_dma_len(sg) - ctx->bufnext_len; + else + dma_count = sg_dma_len(sg); + + if (mid_dma_count) { + /* Append last middle dma buffer to 4 bytes with first + bytes in current sg buffer. Move addr of current + sg and deduce the length of current sg. + */ + memcpy(crc->sg_mid_buf +((i-1) << 2) + mid_dma_count, + (void *)dma_addr, + CHKSUM_DIGEST_SIZE - mid_dma_count); + dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count; + dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count; + } + /* chop current sg dma len to multiple of 32 bits */ + mid_dma_count = dma_count % 4; + dma_count &= ~0x3; + + if (dma_addr % 4 == 0) { + dma_config |= WDSIZE_32; + dma_count >>= 2; + dma_mod = 4; + } else if (dma_addr % 2 == 0) { + dma_config |= WDSIZE_16; + dma_count >>= 1; + dma_mod = 2; + } else { + dma_config |= WDSIZE_8; + dma_mod = 1; + } + + crc->sg_cpu[i].start_addr = dma_addr; + crc->sg_cpu[i].cfg = dma_config; + crc->sg_cpu[i].x_count = dma_count; + crc->sg_cpu[i].x_modify = dma_mod; + dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " + "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + i, crc->sg_cpu[i].start_addr, + crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, + crc->sg_cpu[i].x_modify); + i++; + + if (mid_dma_count) { + /* copy extra bytes to next middle dma buffer */ + dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | + DMAEN | PSIZE_32 | WDSIZE_32; + memcpy(crc->sg_mid_buf + (i << 2), + (void *)(dma_addr + (dma_count << 2)), + mid_dma_count); + /* setup new dma descriptor for next middle dma */ + crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, + crc->sg_mid_buf + (i << 2), + CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE); + crc->sg_cpu[i].cfg = dma_config; + crc->sg_cpu[i].x_count = 1; + crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; + dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " + "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + i, crc->sg_cpu[i].start_addr, + crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, + crc->sg_cpu[i].x_modify); + i++; + } + } + + dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32; + /* For final update req, append the buffer for next update as well*/ + if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE || + ctx->flag == CRC_CRYPTO_STATE_FINISH)) { + crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext, + CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE); + crc->sg_cpu[i].cfg = dma_config; + crc->sg_cpu[i].x_count = 1; + crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; + dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " + "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + i, crc->sg_cpu[i].start_addr, + crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, + crc->sg_cpu[i].x_modify); + i++; + } + + if (i == 0) + return; + + flush_dcache_range((unsigned int)crc->sg_cpu, + (unsigned int)crc->sg_cpu + + i * sizeof(struct dma_desc_array)); + + /* Set the last descriptor to stop mode */ + crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE); + crc->sg_cpu[i - 1].cfg |= DI_EN; + set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma); + set_dma_x_count(crc->dma_ch, 0); + set_dma_x_modify(crc->dma_ch, 0); + SSYNC(); + set_dma_config(crc->dma_ch, dma_config); +} + +static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc, + struct ahash_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct bfin_crypto_crc_reqctx *ctx; + struct scatterlist *sg; + int ret = 0; + int nsg, i, j; + unsigned int nextlen; + unsigned long flags; + + spin_lock_irqsave(&crc->lock, flags); + if (req) + ret = ahash_enqueue_request(&crc->queue, req); + if (crc->busy) { + spin_unlock_irqrestore(&crc->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&crc->queue); + async_req = crypto_dequeue_request(&crc->queue); + if (async_req) + crc->busy = 1; + spin_unlock_irqrestore(&crc->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + crc->req = req; + ctx = ahash_request_ctx(req); + ctx->sg = NULL; + ctx->sg_buflen = 0; + ctx->sg_nents = 0; + + dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n", + ctx->flag, req->nbytes); + + if (ctx->flag == CRC_CRYPTO_STATE_FINISH) { + if (ctx->bufnext_len == 0) { + crc->busy = 0; + return 0; + } + + /* Pack last crc update buffer to 32bit */ + memset(ctx->bufnext + ctx->bufnext_len, 0, + CHKSUM_DIGEST_SIZE - ctx->bufnext_len); + } else { + /* Pack small data which is less than 32bit to buffer for next update. */ + if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) { + memcpy(ctx->bufnext + ctx->bufnext_len, + sg_virt(req->src), req->nbytes); + ctx->bufnext_len += req->nbytes; + if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE && + ctx->bufnext_len) { + goto finish_update; + } else { + crc->busy = 0; + return 0; + } + } + + if (ctx->bufnext_len) { + /* Chain in extra bytes of last update */ + ctx->buflast_len = ctx->bufnext_len; + memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len); + + nsg = ctx->sg_buflen ? 2 : 1; + sg_init_table(ctx->bufsl, nsg); + sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len); + if (nsg > 1) + scatterwalk_sg_chain(ctx->bufsl, nsg, + req->src); + ctx->sg = ctx->bufsl; + } else + ctx->sg = req->src; + + /* Chop crc buffer size to multiple of 32 bit */ + nsg = ctx->sg_nents = sg_count(ctx->sg); + ctx->sg_buflen = ctx->buflast_len + req->nbytes; + ctx->bufnext_len = ctx->sg_buflen % 4; + ctx->sg_buflen &= ~0x3; + + if (ctx->bufnext_len) { + /* copy extra bytes to buffer for next update */ + memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE); + nextlen = ctx->bufnext_len; + for (i = nsg - 1; i >= 0; i--) { + sg = sg_get(ctx->sg, nsg, i); + j = min(nextlen, sg_dma_len(sg)); + memcpy(ctx->bufnext + nextlen - j, + sg_virt(sg) + sg_dma_len(sg) - j, j); + if (j == sg_dma_len(sg)) + ctx->sg_nents--; + nextlen -= j; + if (nextlen == 0) + break; + } + } + } + +finish_update: + if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE || + ctx->flag == CRC_CRYPTO_STATE_FINISH)) + ctx->sg_buflen += CHKSUM_DIGEST_SIZE; + + /* set CRC data count before start DMA */ + crc->regs->datacnt = ctx->sg_buflen >> 2; + + /* setup and enable CRC DMA */ + bfin_crypto_crc_config_dma(crc); + + /* finally kick off CRC operation */ + crc->regs->control |= BLKEN; + SSYNC(); + + return -EINPROGRESS; +} + +static int bfin_crypto_crc_update(struct ahash_request *req) +{ + struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); + + if (!req->nbytes) + return 0; + + dev_dbg(ctx->crc->dev, "crc_update\n"); + ctx->total += req->nbytes; + ctx->flag = CRC_CRYPTO_STATE_UPDATE; + + return bfin_crypto_crc_handle_queue(ctx->crc, req); +} + +static int bfin_crypto_crc_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm); + struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); + + dev_dbg(ctx->crc->dev, "crc_final\n"); + ctx->flag = CRC_CRYPTO_STATE_FINISH; + crc_ctx->key = 0; + + return bfin_crypto_crc_handle_queue(ctx->crc, req); +} + +static int bfin_crypto_crc_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm); + struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); + + dev_dbg(ctx->crc->dev, "crc_finishupdate\n"); + ctx->total += req->nbytes; + ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE; + crc_ctx->key = 0; + + return bfin_crypto_crc_handle_queue(ctx->crc, req); +} + +static int bfin_crypto_crc_digest(struct ahash_request *req) +{ + int ret; + + ret = bfin_crypto_crc_init(req); + if (ret) + return ret; + + return bfin_crypto_crc_finup(req); +} + +static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm); + + dev_dbg(crc_ctx->crc->dev, "crc_setkey\n"); + if (keylen != CHKSUM_DIGEST_SIZE) { + crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + crc_ctx->key = get_unaligned_le32(key); + + return 0; +} + +static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm) +{ + struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm); + + crc_ctx->key = 0; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct bfin_crypto_crc_reqctx)); + + return 0; +} + +static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct ahash_alg algs = { + .init = bfin_crypto_crc_init, + .update = bfin_crypto_crc_update, + .final = bfin_crypto_crc_final, + .finup = bfin_crypto_crc_finup, + .digest = bfin_crypto_crc_digest, + .setkey = bfin_crypto_crc_setkey, + .halg.digestsize = CHKSUM_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(crc32)", + .cra_driver_name = DRIVER_NAME, + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bfin_crypto_crc_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = bfin_crypto_crc_cra_init, + .cra_exit = bfin_crypto_crc_cra_exit, + } +}; + +static void bfin_crypto_crc_done_task(unsigned long data) +{ + struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data; + + bfin_crypto_crc_handle_queue(crc, NULL); +} + +static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id) +{ + struct bfin_crypto_crc *crc = dev_id; + + if (crc->regs->status & DCNTEXP) { + crc->regs->status = DCNTEXP; + SSYNC(); + + /* prepare results */ + put_unaligned_le32(crc->regs->result, crc->req->result); + + crc->regs->control &= ~BLKEN; + crc->busy = 0; + + if (crc->req->base.complete) + crc->req->base.complete(&crc->req->base, 0); + + tasklet_schedule(&crc->done_task); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +#ifdef CONFIG_PM +/** + * bfin_crypto_crc_suspend - suspend crc device + * @pdev: device being suspended + * @state: requested suspend state + */ +static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct bfin_crypto_crc *crc = platform_get_drvdata(pdev); + int i = 100000; + + while ((crc->regs->control & BLKEN) && --i) + cpu_relax(); + + if (i == 0) + return -EBUSY; + + return 0; +} +#else +# define bfin_crypto_crc_suspend NULL +#endif + +#define bfin_crypto_crc_resume NULL + +/** + * bfin_crypto_crc_probe - Initialize module + * + */ +static int __devinit bfin_crypto_crc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct bfin_crypto_crc *crc; + unsigned int timeout = 100000; + int ret; + + crc = kzalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n"); + return -ENOMEM; + } + + crc->dev = dev; + + INIT_LIST_HEAD(&crc->list); + spin_lock_init(&crc->lock); + tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc); + crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); + ret = -ENOENT; + goto out_error_free_mem; + } + + crc->regs = ioremap(res->start, resource_size(res)); + if (!crc->regs) { + dev_err(&pdev->dev, "Cannot map CRC IO\n"); + ret = -ENXIO; + goto out_error_free_mem; + } + + crc->irq = platform_get_irq(pdev, 0); + if (crc->irq < 0) { + dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n"); + ret = -ENOENT; + goto out_error_unmap; + } + + ret = request_irq(crc->irq, bfin_crypto_crc_handler, IRQF_SHARED, dev_name(dev), crc); + if (ret) { + dev_err(&pdev->dev, "Unable to request blackfin crc irq\n"); + goto out_error_unmap; + } + + res = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (res == NULL) { + dev_err(&pdev->dev, "No CRC DMA channel specified\n"); + ret = -ENOENT; + goto out_error_irq; + } + crc->dma_ch = res->start; + + ret = request_dma(crc->dma_ch, dev_name(dev)); + if (ret) { + dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n"); + goto out_error_irq; + } + + crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL); + if (crc->sg_cpu == NULL) { + ret = -ENOMEM; + goto out_error_dma; + } + /* + * need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle + + * 1 last + 1 next dma descriptors + */ + crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1)); + + crc->regs->control = 0; + SSYNC(); + crc->regs->poly = crc->poly = (u32)pdev->dev.platform_data; + SSYNC(); + + while (!(crc->regs->status & LUTDONE) && (--timeout) > 0) + cpu_relax(); + + if (timeout == 0) + dev_info(&pdev->dev, "init crc poly timeout\n"); + + spin_lock(&crc_list.lock); + list_add(&crc->list, &crc_list.dev_list); + spin_unlock(&crc_list.lock); + + platform_set_drvdata(pdev, crc); + + ret = crypto_register_ahash(&algs); + if (ret) { + spin_lock(&crc_list.lock); + list_del(&crc->list); + spin_unlock(&crc_list.lock); + dev_err(&pdev->dev, "Cann't register crypto ahash device\n"); + goto out_error_dma; + } + + dev_info(&pdev->dev, "initialized\n"); + + return 0; + +out_error_dma: + if (crc->sg_cpu) + dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma); + free_dma(crc->dma_ch); +out_error_irq: + free_irq(crc->irq, crc->dev); +out_error_unmap: + iounmap((void *)crc->regs); +out_error_free_mem: + kfree(crc); + + return ret; +} + +/** + * bfin_crypto_crc_remove - Initialize module + * + */ +static int __devexit bfin_crypto_crc_remove(struct platform_device *pdev) +{ + struct bfin_crypto_crc *crc = platform_get_drvdata(pdev); + + if (!crc) + return -ENODEV; + + spin_lock(&crc_list.lock); + list_del(&crc->list); + spin_unlock(&crc_list.lock); + + crypto_unregister_ahash(&algs); + tasklet_kill(&crc->done_task); + iounmap((void *)crc->regs); + free_dma(crc->dma_ch); + if (crc->irq > 0) + free_irq(crc->irq, crc->dev); + kfree(crc); + + return 0; +} + +static struct platform_driver bfin_crypto_crc_driver = { + .probe = bfin_crypto_crc_probe, + .remove = __devexit_p(bfin_crypto_crc_remove), + .suspend = bfin_crypto_crc_suspend, + .resume = bfin_crypto_crc_resume, + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, +}; + +/** + * bfin_crypto_crc_mod_init - Initialize module + * + * Checks the module params and registers the platform driver. + * Real work is in the platform probe function. + */ +static int __init bfin_crypto_crc_mod_init(void) +{ + int ret; + + pr_info("Blackfin hardware CRC crypto driver\n"); + + INIT_LIST_HEAD(&crc_list.dev_list); + spin_lock_init(&crc_list.lock); + + ret = platform_driver_register(&bfin_crypto_crc_driver); + if (ret) { + pr_info(KERN_ERR "unable to register driver\n"); + return ret; + } + + return 0; +} + +/** + * bfin_crypto_crc_mod_exit - Deinitialize module + */ +static void __exit bfin_crypto_crc_mod_exit(void) +{ + platform_driver_unregister(&bfin_crypto_crc_driver); +} + +module_init(bfin_crypto_crc_mod_init); +module_exit(bfin_crypto_crc_mod_exit); + +MODULE_AUTHOR("Sonic Zhang "); +MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 170dd56dfc3b13e7dafd48e27f67fddb3f17ef2a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 25 May 2012 15:54:46 +0200 Subject: crypto: mv_cesa - add an expiry timer in case anything goes wrong The timer triggers when 500ms have gone by after triggering the engine and no completion interrupt was received. The callback then tries to sanitise things as well as possible. Signed-off-by: Phil Sutter Signed-off-by: Herbert Xu diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 1cc6b3f..b0b2f02 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -24,6 +24,7 @@ #define MV_CESA "MV-CESA:" #define MAX_HW_HASH_SIZE 0xFFFF +#define MV_CESA_EXPIRE 500 /* msec */ /* * STM: @@ -87,6 +88,7 @@ struct crypto_priv { spinlock_t lock; struct crypto_queue queue; enum engine_status eng_st; + struct timer_list completion_timer; struct crypto_async_request *cur_req; struct req_progress p; int max_req_size; @@ -138,6 +140,29 @@ struct mv_req_hash_ctx { int count_add; }; +static void mv_completion_timer_callback(unsigned long unused) +{ + int active = readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_EN_SEC_ACCL0; + + printk(KERN_ERR MV_CESA + "completion timer expired (CESA %sactive), cleaning up.\n", + active ? "" : "in"); + + del_timer(&cpg->completion_timer); + writel(SEC_CMD_DISABLE_SEC, cpg->reg + SEC_ACCEL_CMD); + while(readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_DISABLE_SEC) + printk(KERN_INFO MV_CESA "%s: waiting for engine finishing\n", __func__); + cpg->eng_st = ENGINE_W_DEQUEUE; + wake_up_process(cpg->queue_th); +} + +static void mv_setup_timer(void) +{ + setup_timer(&cpg->completion_timer, &mv_completion_timer_callback, 0); + mod_timer(&cpg->completion_timer, + jiffies + msecs_to_jiffies(MV_CESA_EXPIRE)); +} + static void compute_aes_dec_key(struct mv_ctx *ctx) { struct crypto_aes_ctx gen_aes_key; @@ -273,12 +298,8 @@ static void mv_process_current_q(int first_block) sizeof(struct sec_accel_config)); /* GO */ + mv_setup_timer(); writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD); - - /* - * XXX: add timer if the interrupt does not occur for some mystery - * reason - */ } static void mv_crypto_algo_completion(void) @@ -357,12 +378,8 @@ static void mv_process_hash_current(int first_block) memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config)); /* GO */ + mv_setup_timer(); writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD); - - /* - * XXX: add timer if the interrupt does not occur for some mystery - * reason - */ } static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx, @@ -888,6 +905,10 @@ irqreturn_t crypto_int(int irq, void *priv) if (!(val & SEC_INT_ACCEL0_DONE)) return IRQ_NONE; + if (!del_timer(&cpg->completion_timer)) { + printk(KERN_WARNING MV_CESA + "got an interrupt but no pending timer?\n"); + } val &= ~SEC_INT_ACCEL0_DONE; writel(val, cpg->reg + FPGA_INT_STATUS); writel(val, cpg->reg + SEC_ACCEL_INT_STATUS); -- cgit v0.10.2 From 5741d2eeaee28b37a4711a1776f716b274f3d2e4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 12 Jun 2012 16:41:21 +0800 Subject: crypto: mv_cesa - initialise the interrupt status field to zero Signed-off-by: Phil Sutter Signed-off-by: Herbert Xu diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index b0b2f02..904623d 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1082,6 +1082,7 @@ static int mv_probe(struct platform_device *pdev) if (!IS_ERR(cp->clk)) clk_prepare_enable(cp->clk); + writel(0, cpg->reg + SEC_ACCEL_INT_STATUS); writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK); writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG); writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0); -- cgit v0.10.2 From 4d03c5047a07a62563e1a8fa798ea258f048bfde Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 25 May 2012 15:54:49 +0200 Subject: crypto: mv_cesa - fix for hash finalisation with data Since mv_hash_final_fallback() uses ctx->state, read out the digest state register before calling it. Signed-off-by: Phil Sutter Signed-off-by: Herbert Xu diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 904623d..0d40717 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -423,6 +423,15 @@ out: return rc; } +static void mv_save_digest_state(struct mv_req_hash_ctx *ctx) +{ + ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A); + ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B); + ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C); + ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D); + ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E); +} + static void mv_hash_algo_completion(void) { struct ahash_request *req = ahash_request_cast(cpg->cur_req); @@ -437,14 +446,12 @@ static void mv_hash_algo_completion(void) memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF, crypto_ahash_digestsize(crypto_ahash_reqtfm (req))); - } else + } else { + mv_save_digest_state(ctx); mv_hash_final_fallback(req); + } } else { - ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A); - ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B); - ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C); - ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D); - ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E); + mv_save_digest_state(ctx); } } -- cgit v0.10.2 From 107778b592576c0c8e8d2ca7a2aa5415a4908223 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Mon, 28 May 2012 15:54:24 +0200 Subject: crypto: twofish - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Twofish block cipher. The implementation processes eight blocks in parallel (two 4 block chunk AVX operations). The table-lookups are done in general-purpose registers. For small blocksizes the 3way-parallel functions from the twofish-x86_64-3way module are called. A good performance increase is provided for blocksizes greater or equal to 128B. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) twofish-avx-x86_64 vs. twofish-x86_64-3way 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.97x 1.00x 0.95x 0.97x 0.97x 0.96x 0.95x 0.95x 0.98x 64B 0.99x 0.99x 1.00x 0.99x 0.98x 0.98x 0.99x 0.98x 0.99x 0.98x 256B 1.20x 1.21x 1.00x 1.19x 1.15x 1.14x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.24x 1.26x 1.28x 1.26x 1.27x 8192B 1.31x 1.32x 1.00x 1.31x 1.25x 1.25x 1.28x 1.29x 1.28x 1.30x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.96x 1.00x 0.96x 0.97x 0.98x 0.95x 0.95x 0.95x 0.96x 64B 1.00x 0.99x 1.00x 0.98x 0.98x 1.01x 0.98x 0.98x 0.98x 0.98x 256B 1.20x 1.21x 1.00x 1.21x 1.15x 1.15x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.23x 1.26x 1.27x 1.26x 1.27x 8192B 1.31x 1.33x 1.00x 1.31x 1.26x 1.26x 1.29x 1.29x 1.28x 1.30x twofish-avx-x86_64 vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.19x 1.63x ecb-dec 1.18x 1.62x cbc-enc 0.75x 1.03x cbc-dec 1.23x 1.67x ctr-enc 1.24x 1.65x ctr-dec 1.24x 1.65x lrw-enc 1.15x 1.53x lrw-dec 1.14x 1.52x xts-enc 1.16x 1.56x xts-dec 1.16x 1.56x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 479f95a7..3420fee 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o +obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -30,6 +31,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o +twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 0000000..fc31b89 --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -0,0 +1,301 @@ +/* + * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "twofish-avx-x86_64-asm_64.S" +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 8-way AVX twofish + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 +#define RY %xmm9 + +#define RK1 %xmm10 +#define RK2 %xmm11 + +#define RID1 %rax +#define RID1b %al +#define RID2 %rbx +#define RID2b %bl + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGS1 %r8 +#define RGS1d %r8d +#define RGS2 %r9 +#define RGS2d %r9d +#define RGS3 %r10 +#define RGS3d %r10d + + +#define lookup_32bit(t0, t1, t2, t3, src, dst) \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + movl t0(CTX, RID1, 4), dst ## d; \ + xorl t1(CTX, RID2, 4), dst ## d; \ + shrq $16, src; \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + xorl t2(CTX, RID1, 4), dst ## d; \ + xorl t3(CTX, RID2, 4), dst ## d; + +#define G(a, x, t0, t1, t2, t3) \ + vmovq a, RGI1; \ + vpsrldq $8, a, x; \ + vmovq x, RGI2; \ + \ + lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ + shrq $16, RGI1; \ + lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + \ + lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ + shrq $16, RGI2; \ + lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ + shlq $32, RGS3; \ + orq RGS1, RGS3; \ + \ + vmovq RGS2, x; \ + vpinsrq $1, RGS3, x, x; + +#define encround(a, b, c, d, x, y) \ + G(a, x, s0, s1, s2, s3); \ + G(b, y, s1, s2, s3, s0); \ + vpaddd x, y, x; \ + vpaddd y, x, y; \ + vpaddd x, RK1, x; \ + vpaddd y, RK2, y; \ + vpxor x, c, c; \ + vpsrld $1, c, x; \ + vpslld $(32 - 1), c, c; \ + vpor c, x, c; \ + vpslld $1, d, x; \ + vpsrld $(32 - 1), d, d; \ + vpor d, x, d; \ + vpxor d, y, d; + +#define decround(a, b, c, d, x, y) \ + G(a, x, s0, s1, s2, s3); \ + G(b, y, s1, s2, s3, s0); \ + vpaddd x, y, x; \ + vpaddd y, x, y; \ + vpaddd y, RK2, y; \ + vpxor d, y, d; \ + vpsrld $1, d, y; \ + vpslld $(32 - 1), d, d; \ + vpor d, y, d; \ + vpslld $1, c, y; \ + vpsrld $(32 - 1), c, c; \ + vpor c, y, c; \ + vpaddd x, RK1, x; \ + vpxor x, c, c; + +#define encrypt_round(n, a, b, c, d) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ + encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define decrypt_round(n, a, b, c, d) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ + decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define encrypt_cycle(n) \ + encrypt_round((2*n), RA, RB, RC, RD); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB); + +#define decrypt_cycle(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ + decrypt_round((2*n), RA, RB, RC, RD); + + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ + vpxor (0*4*4)(in), wkey, x0; \ + vpxor (1*4*4)(in), wkey, x1; \ + vpxor (2*4*4)(in), wkey, x2; \ + vpxor (3*4*4)(in), wkey, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor x0, wkey, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor x1, wkey, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor x2, wkey, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor x3, wkey, x3; \ + vmovdqu x3, (3*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor x0, wkey, x0; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor x1, wkey, x1; \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor x2, wkey, x2; \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor x3, wkey, x3; \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.align 8 +.global __twofish_enc_blk_8way +.type __twofish_enc_blk_8way,@function; + +__twofish_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbx; + pushq %rcx; + + vmovdqu w(CTX), RK1; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + + xorq RID1, RID1; + xorq RID2, RID2; + + encrypt_cycle(0); + encrypt_cycle(1); + encrypt_cycle(2); + encrypt_cycle(3); + encrypt_cycle(4); + encrypt_cycle(5); + encrypt_cycle(6); + encrypt_cycle(7); + + vmovdqu (w+4*4)(CTX), RK1; + + popq %rcx; + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + leaq (4*4*4)(%rax), %rdx; + + testb %cl, %cl; + jnz __enc_xor8; + + outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + ret; + +__enc_xor8: + outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + ret; + +.align 8 +.global twofish_dec_blk_8way +.type twofish_dec_blk_8way,@function; + +twofish_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbx; + + vmovdqu (w+4*4)(CTX), RK1; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); + inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + + xorq RID1, RID1; + xorq RID2, RID2; + + decrypt_cycle(7); + decrypt_cycle(6); + decrypt_cycle(5); + decrypt_cycle(4); + decrypt_cycle(3); + decrypt_cycle(2); + decrypt_cycle(1); + decrypt_cycle(0); + + vmovdqu (w)(CTX), RK1; + + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + + ret; diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 0000000..599f19e --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -0,0 +1,1086 @@ +/* + * Glue Code for AVX assembler version of Twofish Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Glue code based on serpent_sse2_glue.c by: + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define TWOFISH_PARALLEL_BLOCKS 8 + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions from twofish_x86_64-3way module */ +asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_3way_xor(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, true); +} + +/* 8-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_8way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_8way(ctx, dst, src, true); +} + +static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + twofish_dec_blk_8way(ctx, dst, src); +} + + + +struct async_twofish_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + if (fpu_enabled) + return true; + + /* AVX is only used when chunk to be processed is large enough, so + * do not enable FPU until it is necessary. + */ + if (nbytes < TF_BLOCK_SIZE * TWOFISH_PARALLEL_BLOCKS) + return false; + + kernel_fpu_begin(); + return true; +} + +static inline void twofish_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + if (enc) + twofish_enc_blk_xway(ctx, wdst, wsrc); + else + twofish_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * TWOFISH_PARALLEL_BLOCKS; + wdst += bsize * TWOFISH_PARALLEL_BLOCKS; + nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (enc) + twofish_enc_blk_3way(ctx, wdst, wsrc); + else + twofish_dec_blk_3way(ctx, wdst, wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + twofish_enc_blk(ctx, wdst, wsrc); + else + twofish_dec_blk(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; + u128 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (TWOFISH_PARALLEL_BLOCKS - 1); + src -= TWOFISH_PARALLEL_BLOCKS - 1; + dst -= TWOFISH_PARALLEL_BLOCKS - 1; + + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) + u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * (3 - 1); + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + u128_xor(dst + 2, dst + 2, ivs + 1); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[TF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + twofish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, TF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[TWOFISH_PARALLEL_BLOCKS]; + int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblocks[i], &ctrblk); + u128_inc(&ctrblk); + } + + twofish_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += TWOFISH_PARALLEL_BLOCKS; + dst += TWOFISH_PARALLEL_BLOCKS; + nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; + } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[2], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk_3way_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 3; + dst += 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { + fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + twofish_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +struct crypt_priv { + struct twofish_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { + twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) + twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); + + nbytes %= bsize * 3; + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_enc_blk(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { + twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) + twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); + + nbytes %= bsize * 3; + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_dec_blk(ctx->ctx, srcdst, srcdst); +} + +struct twofish_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct twofish_ctx twofish_ctx; +}; + +static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __twofish_setkey(&ctx->twofish_ctx, key, + keylen - TF_BLOCK_SIZE, &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - + TF_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->twofish_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->twofish_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, + key + keylen / 2, keylen / 2, flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[TWOFISH_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + twofish_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static int ablk_init(struct crypto_tfm *tfm) +{ + struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} + +static struct crypto_alg twofish_algs[10] = { { + .cra_name = "__ecb-twofish-avx", + .cra_driver_name = "__driver-ecb-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-twofish-avx", + .cra_driver_name = "__driver-cbc-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-twofish-avx", + .cra_driver_name = "__driver-ctr-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-twofish-avx", + .cra_driver_name = "__driver-lrw-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = lrw_twofish_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-twofish-avx", + .cra_driver_name = "__driver-xts-twofish-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE * 2, + .max_keysize = TF_MAX_KEY_SIZE * 2, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(twofish)", + .cra_driver_name = "ecb-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(twofish)", + .cra_driver_name = "cbc-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(twofish)", + .cra_driver_name = "ctr-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(twofish)", + .cra_driver_name = "lrw-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(twofish)", + .cra_driver_name = "xts-twofish-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = TF_MIN_KEY_SIZE * 2, + .max_keysize = TF_MAX_KEY_SIZE * 2, + .ivsize = TF_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init twofish_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + printk(KERN_INFO "AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + printk(KERN_INFO "AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); +} + +static void __exit twofish_exit(void) +{ + crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); +} + +module_init(twofish_init); +module_exit(twofish_exit); + +MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("twofish"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24..77e4e55 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -45,8 +45,10 @@ asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, /* 3-way parallel cipher functions */ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, bool xor); +EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) diff --git a/crypto/Kconfig b/crypto/Kconfig index 8e84225..e00a4e4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -913,6 +913,30 @@ config CRYPTO_TWOFISH_X86_64_3WAY See also: +config CRYPTO_TWOFISH_AVX_X86_64 + tristate "Twofish cipher algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_CRYPTD + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + select CRYPTO_TWOFISH_X86_64_3WAY + select CRYPTO_LRW + select CRYPTO_XTS + help + Twofish cipher algorithm (x86_64/AVX). + + Twofish was submitted as an AES (Advanced Encryption Standard) + candidate cipher by researchers at CounterPane Systems. It is a + 16 round block cipher supporting key sizes of 128, 192, and 256 + bits. + + This module provides the Twofish cipher algorithm that processes + eight blocks parallel using the AVX Instruction Set. + + See also: + + comment "Compression" config CRYPTO_DEFLATE diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 750cce4..2af8797 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1566,6 +1566,29 @@ static int do_test(int m) speed_template_32_64); break; + case 504: + test_acipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0, + speed_template_32_40_48); + test_acipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0, + speed_template_32_40_48); + test_acipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0, + speed_template_32_48_64); + test_acipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0, + speed_template_32_48_64); + break; + case 1000: test_available(); break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index eb6d20f..73b3ec6 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1549,6 +1549,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "__cbc-twofish-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__driver-cbc-aes-aesni", .test = alg_test_null, .suite = { @@ -1579,6 +1594,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "__driver-cbc-twofish-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__driver-ecb-aes-aesni", .test = alg_test_null, .suite = { @@ -1609,6 +1639,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "__driver-ecb-twofish-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__ghash-pclmulqdqni", .test = alg_test_null, .suite = { @@ -1806,6 +1851,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "cryptd(__driver-ecb-twofish-avx)", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "cryptd(__ghash-pclmulqdqni)", .test = alg_test_null, .suite = { -- cgit v0.10.2 From 4da7de4d8be7d18559c56bca446b1161a3b63acc Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Mon, 28 May 2012 15:55:38 +0200 Subject: crypto: testmgr - expand twofish test vectors The AVX implementation of the twofish cipher processes 8 blocks parallel, so we need to make test vectors larger to check parallel code paths. Test vectors are also large enough to deal with 16 block parallel implementations which may occur in the future. Signed-off-by: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 34a9d51..4d84fe4 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -2765,8 +2765,62 @@ static struct cipher_testvec tf_enc_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .ilen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .ilen = 496, .result = "\x88\xCB\x1E\xC2\xAF\x8A\x97\xFF" "\xF6\x90\x46\x9C\x4A\x0F\x08\xDC" "\xDE\xAB\xAD\xFA\xFC\xA8\xC2\x3D" @@ -2774,8 +2828,62 @@ static struct cipher_testvec tf_enc_tv_template[] = { "\x34\x9E\xB6\x08\xB2\xDD\xA8\xF5" "\xDF\xFA\xC7\xE8\x09\x50\x76\x08" "\xA2\xB6\x6A\x59\xC0\x2B\x6D\x05" - "\x89\xF6\x82\xF0\xD3\xDB\x06\x02", - .rlen = 64, + "\x89\xF6\x82\xF0\xD3\xDB\x06\x02" + "\xB5\x11\x5C\x5E\x79\x1A\xAC\x43" + "\x5C\xC0\x30\x4B\x6B\x16\xA1\x40" + "\x80\x27\x88\xBA\x2C\x74\x42\xE0" + "\x1B\xA5\x85\x08\xB9\xE6\x22\x7A" + "\x36\x3B\x0D\x9F\xA0\x22\x6C\x2A" + "\x91\x75\x47\xBC\x67\x21\x4E\xF9" + "\xEA\xFF\xD9\xD5\xC0\xFC\x9E\x2C" + "\x3E\xAD\xC6\x61\x0E\x93\x7A\x22" + "\x09\xC8\x8D\xC1\x8E\xB4\x8B\x5C" + "\xC6\x24\x42\xB8\x23\x66\x80\xA9" + "\x32\x0B\x7A\x29\xBF\xB3\x0B\x63" + "\x43\x27\x13\xA9\xBE\xEB\xBD\xF3" + "\x33\x62\x70\xE2\x1B\x86\x7A\xA1" + "\x51\x4A\x16\xFE\x29\x63\x7E\xD0" + "\x7A\xA4\x6E\x2C\xF8\xC1\xDB\xE8" + "\xCB\x4D\xD2\x8C\x04\x14\xB4\x66" + "\x41\xB7\x3A\x96\x16\x7C\x1D\x5B" + "\xB6\x41\x42\x64\x43\xEE\x6E\x7C" + "\x8B\xAF\x01\x9C\xA4\x6E\x75\x8F" + "\xDE\x10\x9F\xA6\xE7\xD6\x44\x97" + "\x66\xA3\x96\x0F\x1C\x25\x60\xF5" + "\x3C\x2E\x32\x69\x0E\x82\xFF\x27" + "\x0F\xB5\x06\xDA\xD8\x31\x15\x6C" + "\xDF\x18\x6C\x87\xF5\x3B\x11\x9A" + "\x1B\x42\x1F\x5B\x29\x19\x96\x13" + "\x68\x2E\x5E\x08\x1C\x8F\x32\x4B" + "\x81\x77\x6D\xF4\xA0\x01\x42\xEC" + "\xDD\x5B\xFD\x3A\x8E\x6A\x14\xFB" + "\x83\x54\xDF\x0F\x86\xB7\xEA\x40" + "\x46\x39\xF7\x2A\x89\x8D\x4E\x96" + "\x5F\x5F\x6D\x76\xC6\x13\x9D\x3D" + "\x1D\x5F\x0C\x7D\xE2\xBC\xC2\x16" + "\x16\xBE\x89\x3E\xB0\x61\xA2\x5D" + "\xAF\xD1\x40\x5F\x1A\xB8\x26\x41" + "\xC6\xBD\x36\xEF\xED\x29\x50\x6D" + "\x10\xEF\x26\xE8\xA8\x93\x11\x3F" + "\x2D\x1F\x88\x20\x77\x45\xF5\x66" + "\x08\xB9\xF1\xEF\xB1\x93\xA8\x81" + "\x65\xC5\xCD\x3E\x8C\x06\x60\x2C" + "\xB2\x10\x7A\xCA\x05\x25\x59\xDB" + "\xC7\x28\xF5\x20\x35\x52\x9E\x62" + "\xF8\x88\x24\x1C\x4D\x84\x12\x39" + "\x39\xE4\x2E\xF4\xD4\x9D\x2B\xBC" + "\x87\x66\xE6\xC0\x6B\x31\x9A\x66" + "\x03\xDC\x95\xD8\x6B\xD0\x30\x8F" + "\xDF\x8F\x8D\xFA\xEC\x1F\x08\xBD" + "\xA3\x63\xE2\x71\x4F\x03\x94\x87" + "\x50\xDF\x15\x1F\xED\x3A\xA3\x7F" + "\x1F\x2A\xB5\xA1\x69\xAC\x4B\x0D" + "\x84\x9B\x2A\xE9\x55\xDD\x46\x91" + "\x15\x33\xF3\x2B\x9B\x46\x97\x00" + "\xF0\x29\xD8\x59\x5D\x33\x37\xF9" + "\x58\x33\x9B\x78\xC7\x58\x48\x6B" + "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5", + .rlen = 496, }, }; @@ -2822,8 +2930,62 @@ static struct cipher_testvec tf_dec_tv_template[] = { "\x34\x9E\xB6\x08\xB2\xDD\xA8\xF5" "\xDF\xFA\xC7\xE8\x09\x50\x76\x08" "\xA2\xB6\x6A\x59\xC0\x2B\x6D\x05" - "\x89\xF6\x82\xF0\xD3\xDB\x06\x02", - .ilen = 64, + "\x89\xF6\x82\xF0\xD3\xDB\x06\x02" + "\xB5\x11\x5C\x5E\x79\x1A\xAC\x43" + "\x5C\xC0\x30\x4B\x6B\x16\xA1\x40" + "\x80\x27\x88\xBA\x2C\x74\x42\xE0" + "\x1B\xA5\x85\x08\xB9\xE6\x22\x7A" + "\x36\x3B\x0D\x9F\xA0\x22\x6C\x2A" + "\x91\x75\x47\xBC\x67\x21\x4E\xF9" + "\xEA\xFF\xD9\xD5\xC0\xFC\x9E\x2C" + "\x3E\xAD\xC6\x61\x0E\x93\x7A\x22" + "\x09\xC8\x8D\xC1\x8E\xB4\x8B\x5C" + "\xC6\x24\x42\xB8\x23\x66\x80\xA9" + "\x32\x0B\x7A\x29\xBF\xB3\x0B\x63" + "\x43\x27\x13\xA9\xBE\xEB\xBD\xF3" + "\x33\x62\x70\xE2\x1B\x86\x7A\xA1" + "\x51\x4A\x16\xFE\x29\x63\x7E\xD0" + "\x7A\xA4\x6E\x2C\xF8\xC1\xDB\xE8" + "\xCB\x4D\xD2\x8C\x04\x14\xB4\x66" + "\x41\xB7\x3A\x96\x16\x7C\x1D\x5B" + "\xB6\x41\x42\x64\x43\xEE\x6E\x7C" + "\x8B\xAF\x01\x9C\xA4\x6E\x75\x8F" + "\xDE\x10\x9F\xA6\xE7\xD6\x44\x97" + "\x66\xA3\x96\x0F\x1C\x25\x60\xF5" + "\x3C\x2E\x32\x69\x0E\x82\xFF\x27" + "\x0F\xB5\x06\xDA\xD8\x31\x15\x6C" + "\xDF\x18\x6C\x87\xF5\x3B\x11\x9A" + "\x1B\x42\x1F\x5B\x29\x19\x96\x13" + "\x68\x2E\x5E\x08\x1C\x8F\x32\x4B" + "\x81\x77\x6D\xF4\xA0\x01\x42\xEC" + "\xDD\x5B\xFD\x3A\x8E\x6A\x14\xFB" + "\x83\x54\xDF\x0F\x86\xB7\xEA\x40" + "\x46\x39\xF7\x2A\x89\x8D\x4E\x96" + "\x5F\x5F\x6D\x76\xC6\x13\x9D\x3D" + "\x1D\x5F\x0C\x7D\xE2\xBC\xC2\x16" + "\x16\xBE\x89\x3E\xB0\x61\xA2\x5D" + "\xAF\xD1\x40\x5F\x1A\xB8\x26\x41" + "\xC6\xBD\x36\xEF\xED\x29\x50\x6D" + "\x10\xEF\x26\xE8\xA8\x93\x11\x3F" + "\x2D\x1F\x88\x20\x77\x45\xF5\x66" + "\x08\xB9\xF1\xEF\xB1\x93\xA8\x81" + "\x65\xC5\xCD\x3E\x8C\x06\x60\x2C" + "\xB2\x10\x7A\xCA\x05\x25\x59\xDB" + "\xC7\x28\xF5\x20\x35\x52\x9E\x62" + "\xF8\x88\x24\x1C\x4D\x84\x12\x39" + "\x39\xE4\x2E\xF4\xD4\x9D\x2B\xBC" + "\x87\x66\xE6\xC0\x6B\x31\x9A\x66" + "\x03\xDC\x95\xD8\x6B\xD0\x30\x8F" + "\xDF\x8F\x8D\xFA\xEC\x1F\x08\xBD" + "\xA3\x63\xE2\x71\x4F\x03\x94\x87" + "\x50\xDF\x15\x1F\xED\x3A\xA3\x7F" + "\x1F\x2A\xB5\xA1\x69\xAC\x4B\x0D" + "\x84\x9B\x2A\xE9\x55\xDD\x46\x91" + "\x15\x33\xF3\x2B\x9B\x46\x97\x00" + "\xF0\x29\xD8\x59\x5D\x33\x37\xF9" + "\x58\x33\x9B\x78\xC7\x58\x48\x6B" + "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5", + .ilen = 496, .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" @@ -2831,8 +2993,62 @@ static struct cipher_testvec tf_dec_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .rlen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .rlen = 496, }, }; @@ -2894,8 +3110,62 @@ static struct cipher_testvec tf_cbc_enc_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .ilen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .ilen = 496, .result = "\xC8\xFF\xF2\x53\xA6\x27\x09\xD1" "\x33\x38\xC2\xC0\x0C\x14\x7E\xB5" "\x26\x1B\x05\x0C\x05\x12\x3F\xC0" @@ -2903,8 +3173,62 @@ static struct cipher_testvec tf_cbc_enc_tv_template[] = { "\x3D\x32\xDF\xDA\x56\x00\x6E\xEE" "\x5B\x2A\x72\x9D\xC2\x4D\x19\xBC" "\x8C\x53\xFA\x87\x6F\xDD\x81\xA3" - "\xB1\xD3\x44\x65\xDF\xE7\x63\x38", - .rlen = 64, + "\xB1\xD3\x44\x65\xDF\xE7\x63\x38" + "\x4A\xFC\xDC\xEC\x3F\x26\x8E\xB8" + "\x43\xFC\xFE\x18\xB5\x11\x6D\x31" + "\x81\x8B\x0D\x75\xF6\x80\xEC\x84" + "\x04\xB9\xE6\x09\x63\xED\x39\xDB" + "\xC3\xF6\x14\xD6\x6E\x5E\x8B\xBD" + "\x3E\xFA\xD7\x98\x50\x6F\xD9\x63" + "\x02\xCD\x0D\x39\x4B\x0D\xEC\x80" + "\xE3\x6A\x17\xF4\xCC\xAD\xFF\x68" + "\x45\xDD\xC8\x83\x1D\x41\x96\x0D" + "\x91\x2E\x05\xD3\x59\x82\xE0\x43" + "\x90\x4F\xB9\xF7\xAD\x6B\x2E\xAF" + "\xA7\x84\x00\x53\xCD\x6F\xD1\x0C" + "\x4E\xF9\x5A\x23\xFB\xCA\xC7\xD3" + "\xA9\xAA\x9D\xB2\x3F\x66\xF1\xAC" + "\x25\x21\x8F\xF7\xEF\xF2\x6A\xDF" + "\xE8\xDA\x75\x1A\x8A\xF1\xDD\x38" + "\x1F\xF9\x3D\x68\x4A\xBB\x9E\x34" + "\x1F\x66\x1F\x9C\x2B\x54\xFF\x60" + "\x7F\x29\x4B\x55\x80\x8F\x4E\xA7" + "\xA6\x9A\x0A\xD9\x0D\x19\x00\xF8" + "\x1F\xBC\x0C\x40\x6B\xEC\x99\x25" + "\x94\x70\x74\x0E\x1D\xC5\xBC\x12" + "\xF3\x42\xBE\x95\xBF\xFB\x4E\x55" + "\x9A\xB9\xCE\x14\x16\x5B\xDC\xD3" + "\x75\x42\x62\x04\x31\x1F\x95\x7C" + "\x66\x1A\x97\xDC\x2F\x40\x5C\x39" + "\x78\xE6\x02\xDB\x49\xE1\xC6\x47" + "\xC2\x78\x9A\xBB\xF3\xBE\xCB\x93" + "\xD8\xB8\xE8\xBB\x8C\xB3\x9B\xA7" + "\xC2\x89\xF3\x91\x88\x83\x3D\xF0" + "\x29\xA2\xCD\xB5\x79\x16\xC2\x40" + "\x11\x03\x8E\x9C\xFD\xC9\x43\xC4" + "\xC2\x19\xF0\x4A\x32\xEF\x0C\x2B" + "\xD3\x2B\xE9\xD4\x4C\xDE\x95\xCF" + "\x04\x03\xD3\x2C\x7F\x82\xC8\xFA" + "\x0F\xD8\x7A\x39\x7B\x01\x41\x9C" + "\x78\xB6\xC9\xBF\xF9\x78\x57\x88" + "\xB1\xA5\xE1\xE0\xD9\x16\xD4\xC8" + "\xEE\xC4\xBE\x7B\x55\x59\x00\x48" + "\x1B\xBC\x14\xFA\x2A\x9D\xC9\x1C" + "\xFB\x28\x3F\x95\xDD\xB7\xD6\xCE" + "\x3A\x7F\x09\x0C\x0E\x69\x30\x7D" + "\xBC\x68\x9C\x91\x2A\x59\x57\x04" + "\xED\x1A\x1E\x00\xB1\x85\x92\x04" + "\x28\x8C\x0C\x3C\xC1\xD5\x12\xF7" + "\x4C\x3E\xB0\xE7\x86\x62\x68\x91" + "\xFC\xC4\xE2\xCE\xA6\xDC\x5E\x93" + "\x5D\x8D\x8C\x68\xB3\xB2\xB9\x64" + "\x16\xB8\xC8\x6F\xD8\xEE\x21\xBD" + "\xAC\x18\x0C\x7D\x0D\x05\xAB\xF1" + "\xFA\xDD\xE2\x48\xDF\x4C\x02\x39" + "\x69\xA1\x62\xBD\x49\x3A\x9D\x91" + "\x30\x70\x56\xA4\x37\xDD\x7C\xC0" + "\x0A\xA3\x30\x10\x26\x25\x41\x2C", + .rlen = 496, }, }; @@ -2966,8 +3290,62 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = { "\x3D\x32\xDF\xDA\x56\x00\x6E\xEE" "\x5B\x2A\x72\x9D\xC2\x4D\x19\xBC" "\x8C\x53\xFA\x87\x6F\xDD\x81\xA3" - "\xB1\xD3\x44\x65\xDF\xE7\x63\x38", - .ilen = 64, + "\xB1\xD3\x44\x65\xDF\xE7\x63\x38" + "\x4A\xFC\xDC\xEC\x3F\x26\x8E\xB8" + "\x43\xFC\xFE\x18\xB5\x11\x6D\x31" + "\x81\x8B\x0D\x75\xF6\x80\xEC\x84" + "\x04\xB9\xE6\x09\x63\xED\x39\xDB" + "\xC3\xF6\x14\xD6\x6E\x5E\x8B\xBD" + "\x3E\xFA\xD7\x98\x50\x6F\xD9\x63" + "\x02\xCD\x0D\x39\x4B\x0D\xEC\x80" + "\xE3\x6A\x17\xF4\xCC\xAD\xFF\x68" + "\x45\xDD\xC8\x83\x1D\x41\x96\x0D" + "\x91\x2E\x05\xD3\x59\x82\xE0\x43" + "\x90\x4F\xB9\xF7\xAD\x6B\x2E\xAF" + "\xA7\x84\x00\x53\xCD\x6F\xD1\x0C" + "\x4E\xF9\x5A\x23\xFB\xCA\xC7\xD3" + "\xA9\xAA\x9D\xB2\x3F\x66\xF1\xAC" + "\x25\x21\x8F\xF7\xEF\xF2\x6A\xDF" + "\xE8\xDA\x75\x1A\x8A\xF1\xDD\x38" + "\x1F\xF9\x3D\x68\x4A\xBB\x9E\x34" + "\x1F\x66\x1F\x9C\x2B\x54\xFF\x60" + "\x7F\x29\x4B\x55\x80\x8F\x4E\xA7" + "\xA6\x9A\x0A\xD9\x0D\x19\x00\xF8" + "\x1F\xBC\x0C\x40\x6B\xEC\x99\x25" + "\x94\x70\x74\x0E\x1D\xC5\xBC\x12" + "\xF3\x42\xBE\x95\xBF\xFB\x4E\x55" + "\x9A\xB9\xCE\x14\x16\x5B\xDC\xD3" + "\x75\x42\x62\x04\x31\x1F\x95\x7C" + "\x66\x1A\x97\xDC\x2F\x40\x5C\x39" + "\x78\xE6\x02\xDB\x49\xE1\xC6\x47" + "\xC2\x78\x9A\xBB\xF3\xBE\xCB\x93" + "\xD8\xB8\xE8\xBB\x8C\xB3\x9B\xA7" + "\xC2\x89\xF3\x91\x88\x83\x3D\xF0" + "\x29\xA2\xCD\xB5\x79\x16\xC2\x40" + "\x11\x03\x8E\x9C\xFD\xC9\x43\xC4" + "\xC2\x19\xF0\x4A\x32\xEF\x0C\x2B" + "\xD3\x2B\xE9\xD4\x4C\xDE\x95\xCF" + "\x04\x03\xD3\x2C\x7F\x82\xC8\xFA" + "\x0F\xD8\x7A\x39\x7B\x01\x41\x9C" + "\x78\xB6\xC9\xBF\xF9\x78\x57\x88" + "\xB1\xA5\xE1\xE0\xD9\x16\xD4\xC8" + "\xEE\xC4\xBE\x7B\x55\x59\x00\x48" + "\x1B\xBC\x14\xFA\x2A\x9D\xC9\x1C" + "\xFB\x28\x3F\x95\xDD\xB7\xD6\xCE" + "\x3A\x7F\x09\x0C\x0E\x69\x30\x7D" + "\xBC\x68\x9C\x91\x2A\x59\x57\x04" + "\xED\x1A\x1E\x00\xB1\x85\x92\x04" + "\x28\x8C\x0C\x3C\xC1\xD5\x12\xF7" + "\x4C\x3E\xB0\xE7\x86\x62\x68\x91" + "\xFC\xC4\xE2\xCE\xA6\xDC\x5E\x93" + "\x5D\x8D\x8C\x68\xB3\xB2\xB9\x64" + "\x16\xB8\xC8\x6F\xD8\xEE\x21\xBD" + "\xAC\x18\x0C\x7D\x0D\x05\xAB\xF1" + "\xFA\xDD\xE2\x48\xDF\x4C\x02\x39" + "\x69\xA1\x62\xBD\x49\x3A\x9D\x91" + "\x30\x70\x56\xA4\x37\xDD\x7C\xC0" + "\x0A\xA3\x30\x10\x26\x25\x41\x2C", + .ilen = 496, .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" @@ -2975,8 +3353,62 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .rlen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .rlen = 496, }, }; @@ -2996,8 +3428,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .ilen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .ilen = 496, .result = "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE" "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30" "\x26\x9B\x89\xA1\xEE\x43\xE0\x52" @@ -3005,8 +3491,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = { "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0" "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA" "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60" - "\x01\x41\x21\x12\x38\xAB\x52\x4F", - .rlen = 64, + "\x01\x41\x21\x12\x38\xAB\x52\x4F" + "\xA8\x57\x20\xE0\x21\x6A\x17\x0D" + "\x0E\xF9\x8E\x49\x42\x00\x3C\x94" + "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29" + "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC" + "\x29\x35\x25\x2F\xE7\x11\x6C\x68" + "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9" + "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA" + "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E" + "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E" + "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C" + "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69" + "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58" + "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C" + "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06" + "\x02\xC5\x03\x9D\xC4\x48\x15\x66" + "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB" + "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A" + "\x23\x61\x48\xEA\x80\x04\x27\xAA" + "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A" + "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23" + "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D" + "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D" + "\x96\xBA\x36\x11\x45\x41\xDA\xCE" + "\xA4\x48\x80\x8B\x06\xF4\x98\x89" + "\x8B\x23\x08\x53\xF4\xD4\x5A\x24" + "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0" + "\xF8\xFE\x09\x0C\x75\x05\x38\x0B" + "\x7C\x81\xDE\x9D\xE4\x61\x37\x63" + "\x63\xAD\x12\xD2\x04\xB9\xCE\x45" + "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74" + "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5" + "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4" + "\xEB\x6E\x96\xE8\x43\x80\xB5\x51" + "\x61\x2D\x48\xAA\x07\x65\x11\x8C" + "\x48\xE3\x90\x7E\x78\x3A\xEC\x97" + "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD" + "\x83\x29\x0E\x1A\x81\x73\x7B\xE0" + "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D" + "\x49\xA4\x2F\x6E\xBE\x68\x99\x08" + "\x99\xAA\x4C\x12\x04\xAE\x1F\x77" + "\x35\x88\xF1\x65\x06\x0A\x0B\x4D" + "\x47\xF9\x50\x38\x5D\x71\xF9\x6E" + "\xDE\xEC\x61\x35\x2C\x4C\x96\x50" + "\xE8\x28\x93\x9C\x7E\x01\xC6\x04" + "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D" + "\x11\xE9\x43\x83\x76\xAA\x53\x37" + "\x0C\x1D\x39\x89\x53\x72\x09\x7E" + "\xD9\x85\x16\x04\xA5\x2C\x05\x6F" + "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9" + "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D" + "\x7C\x36\xC7\x71\x70\x9C\x10\xD8" + "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3" + "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC" + "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF", + .rlen = 496, }, { /* Generated with Crypto++ */ .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" @@ -3023,8 +3563,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = { "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" - "\xC3\x37\xCE", - .ilen = 67, + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" + "\x2B\xC2\x59", + .ilen = 499, .result = "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE" "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30" "\x26\x9B\x89\xA1\xEE\x43\xE0\x52" @@ -3033,8 +3627,62 @@ static struct cipher_testvec tf_ctr_enc_tv_template[] = { "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA" "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60" "\x01\x41\x21\x12\x38\xAB\x52\x4F" - "\xA8\x57\x20", - .rlen = 67, + "\xA8\x57\x20\xE0\x21\x6A\x17\x0D" + "\x0E\xF9\x8E\x49\x42\x00\x3C\x94" + "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29" + "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC" + "\x29\x35\x25\x2F\xE7\x11\x6C\x68" + "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9" + "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA" + "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E" + "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E" + "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C" + "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69" + "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58" + "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C" + "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06" + "\x02\xC5\x03\x9D\xC4\x48\x15\x66" + "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB" + "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A" + "\x23\x61\x48\xEA\x80\x04\x27\xAA" + "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A" + "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23" + "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D" + "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D" + "\x96\xBA\x36\x11\x45\x41\xDA\xCE" + "\xA4\x48\x80\x8B\x06\xF4\x98\x89" + "\x8B\x23\x08\x53\xF4\xD4\x5A\x24" + "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0" + "\xF8\xFE\x09\x0C\x75\x05\x38\x0B" + "\x7C\x81\xDE\x9D\xE4\x61\x37\x63" + "\x63\xAD\x12\xD2\x04\xB9\xCE\x45" + "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74" + "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5" + "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4" + "\xEB\x6E\x96\xE8\x43\x80\xB5\x51" + "\x61\x2D\x48\xAA\x07\x65\x11\x8C" + "\x48\xE3\x90\x7E\x78\x3A\xEC\x97" + "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD" + "\x83\x29\x0E\x1A\x81\x73\x7B\xE0" + "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D" + "\x49\xA4\x2F\x6E\xBE\x68\x99\x08" + "\x99\xAA\x4C\x12\x04\xAE\x1F\x77" + "\x35\x88\xF1\x65\x06\x0A\x0B\x4D" + "\x47\xF9\x50\x38\x5D\x71\xF9\x6E" + "\xDE\xEC\x61\x35\x2C\x4C\x96\x50" + "\xE8\x28\x93\x9C\x7E\x01\xC6\x04" + "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D" + "\x11\xE9\x43\x83\x76\xAA\x53\x37" + "\x0C\x1D\x39\x89\x53\x72\x09\x7E" + "\xD9\x85\x16\x04\xA5\x2C\x05\x6F" + "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9" + "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D" + "\x7C\x36\xC7\x71\x70\x9C\x10\xD8" + "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3" + "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC" + "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF" + "\x6C\x82\x9D", + .rlen = 499, }, }; @@ -3054,8 +3702,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = { "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0" "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA" "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60" - "\x01\x41\x21\x12\x38\xAB\x52\x4F", - .ilen = 64, + "\x01\x41\x21\x12\x38\xAB\x52\x4F" + "\xA8\x57\x20\xE0\x21\x6A\x17\x0D" + "\x0E\xF9\x8E\x49\x42\x00\x3C\x94" + "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29" + "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC" + "\x29\x35\x25\x2F\xE7\x11\x6C\x68" + "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9" + "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA" + "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E" + "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E" + "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C" + "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69" + "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58" + "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C" + "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06" + "\x02\xC5\x03\x9D\xC4\x48\x15\x66" + "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB" + "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A" + "\x23\x61\x48\xEA\x80\x04\x27\xAA" + "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A" + "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23" + "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D" + "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D" + "\x96\xBA\x36\x11\x45\x41\xDA\xCE" + "\xA4\x48\x80\x8B\x06\xF4\x98\x89" + "\x8B\x23\x08\x53\xF4\xD4\x5A\x24" + "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0" + "\xF8\xFE\x09\x0C\x75\x05\x38\x0B" + "\x7C\x81\xDE\x9D\xE4\x61\x37\x63" + "\x63\xAD\x12\xD2\x04\xB9\xCE\x45" + "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74" + "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5" + "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4" + "\xEB\x6E\x96\xE8\x43\x80\xB5\x51" + "\x61\x2D\x48\xAA\x07\x65\x11\x8C" + "\x48\xE3\x90\x7E\x78\x3A\xEC\x97" + "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD" + "\x83\x29\x0E\x1A\x81\x73\x7B\xE0" + "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D" + "\x49\xA4\x2F\x6E\xBE\x68\x99\x08" + "\x99\xAA\x4C\x12\x04\xAE\x1F\x77" + "\x35\x88\xF1\x65\x06\x0A\x0B\x4D" + "\x47\xF9\x50\x38\x5D\x71\xF9\x6E" + "\xDE\xEC\x61\x35\x2C\x4C\x96\x50" + "\xE8\x28\x93\x9C\x7E\x01\xC6\x04" + "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D" + "\x11\xE9\x43\x83\x76\xAA\x53\x37" + "\x0C\x1D\x39\x89\x53\x72\x09\x7E" + "\xD9\x85\x16\x04\xA5\x2C\x05\x6F" + "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9" + "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D" + "\x7C\x36\xC7\x71\x70\x9C\x10\xD8" + "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3" + "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC" + "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF", + .ilen = 496, .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" @@ -3063,8 +3765,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = { "\x1E\x92\x29\xC0\x34\xCB\x62\xF9" "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" - "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C", - .rlen = 64, + "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", + .rlen = 496, }, { /* Generated with Crypto++ */ .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" @@ -3081,8 +3837,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = { "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA" "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60" "\x01\x41\x21\x12\x38\xAB\x52\x4F" - "\xA8\x57\x20", - .ilen = 67, + "\xA8\x57\x20\xE0\x21\x6A\x17\x0D" + "\x0E\xF9\x8E\x49\x42\x00\x3C\x94" + "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29" + "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC" + "\x29\x35\x25\x2F\xE7\x11\x6C\x68" + "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9" + "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA" + "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E" + "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E" + "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C" + "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69" + "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58" + "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C" + "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06" + "\x02\xC5\x03\x9D\xC4\x48\x15\x66" + "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB" + "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A" + "\x23\x61\x48\xEA\x80\x04\x27\xAA" + "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A" + "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23" + "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D" + "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D" + "\x96\xBA\x36\x11\x45\x41\xDA\xCE" + "\xA4\x48\x80\x8B\x06\xF4\x98\x89" + "\x8B\x23\x08\x53\xF4\xD4\x5A\x24" + "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0" + "\xF8\xFE\x09\x0C\x75\x05\x38\x0B" + "\x7C\x81\xDE\x9D\xE4\x61\x37\x63" + "\x63\xAD\x12\xD2\x04\xB9\xCE\x45" + "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74" + "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5" + "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4" + "\xEB\x6E\x96\xE8\x43\x80\xB5\x51" + "\x61\x2D\x48\xAA\x07\x65\x11\x8C" + "\x48\xE3\x90\x7E\x78\x3A\xEC\x97" + "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD" + "\x83\x29\x0E\x1A\x81\x73\x7B\xE0" + "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D" + "\x49\xA4\x2F\x6E\xBE\x68\x99\x08" + "\x99\xAA\x4C\x12\x04\xAE\x1F\x77" + "\x35\x88\xF1\x65\x06\x0A\x0B\x4D" + "\x47\xF9\x50\x38\x5D\x71\xF9\x6E" + "\xDE\xEC\x61\x35\x2C\x4C\x96\x50" + "\xE8\x28\x93\x9C\x7E\x01\xC6\x04" + "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D" + "\x11\xE9\x43\x83\x76\xAA\x53\x37" + "\x0C\x1D\x39\x89\x53\x72\x09\x7E" + "\xD9\x85\x16\x04\xA5\x2C\x05\x6F" + "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9" + "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D" + "\x7C\x36\xC7\x71\x70\x9C\x10\xD8" + "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3" + "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC" + "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF" + "\x6C\x82\x9D", + .ilen = 499, .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" @@ -3091,8 +3901,62 @@ static struct cipher_testvec tf_ctr_dec_tv_template[] = { "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48" "\xDF\x76\x0D\x81\x18\xAF\x23\xBA" "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C" - "\xC3\x37\xCE", - .rlen = 67, + "\xC3\x37\xCE\x65\xFC\x70\x07\x9E" + "\x12\xA9\x40\xD7\x4B\xE2\x79\x10" + "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F" + "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1" + "\x68\xFF\x73\x0A\xA1\x15\xAC\x43" + "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5" + "\x29\xC0\x57\xEE\x62\xF9\x90\x04" + "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76" + "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8" + "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A" + "\xF1\x65\xFC\x93\x07\x9E\x35\xCC" + "\x40\xD7\x6E\x05\x79\x10\xA7\x1B" + "\xB2\x49\xE0\x54\xEB\x82\x19\x8D" + "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF" + "\x96\x0A\xA1\x38\xCF\x43\xDA\x71" + "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3" + "\x57\xEE\x85\x1C\x90\x27\xBE\x32" + "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4" + "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16" + "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88" + "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA" + "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49" + "\xE0\x77\x0E\x82\x19\xB0\x24\xBB" + "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D" + "\xC4\x38\xCF\x66\xFD\x71\x08\x9F" + "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11" + "\x85\x1C\xB3\x27\xBE\x55\xEC\x60" + "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2" + "\x69\x00\x74\x0B\xA2\x16\xAD\x44" + "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6" + "\x2A\xC1\x58\xEF\x63\xFA\x91\x05" + "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77" + "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9" + "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B" + "\xF2\x66\xFD\x94\x08\x9F\x36\xCD" + "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C" + "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E" + "\x25\xBC\x30\xC7\x5E\xF5\x69\x00" + "\x97\x0B\xA2\x39\xD0\x44\xDB\x72" + "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4" + "\x58\xEF\x86\x1D\x91\x28\xBF\x33" + "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5" + "\x3C\xD3\x47\xDE\x75\x0C\x80\x17" + "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89" + "\x20\x94\x2B\xC2\x36\xCD\x64\xFB" + "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A" + "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC" + "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E" + "\xC5\x39\xD0\x67\xFE\x72\x09\xA0" + "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12" + "\x86\x1D\xB4\x28\xBF\x56\xED\x61" + "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" + "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" + "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" + "\x2B\xC2\x59", + .rlen = 499, }, }; -- cgit v0.10.2 From 7efe4076725aeb01722445b56613681aa492c8d6 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Tue, 12 Jun 2012 16:47:43 +0800 Subject: crypto: serpent - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Serpent block cipher. The implementation is very similar to the sse2 implementation and processes eight blocks in parallel. Because of the new non-destructive three operand syntax all move-instructions can be removed and therefore a little performance increase is provided. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) serpent-avx-x86_64 vs. serpent-sse2-x86_64 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.03x 1.01x 1.01x 1.01x 1.00x 1.00x 1.00x 1.00x 1.00x 1.01x 64B 1.00x 1.00x 1.00x 1.00x 1.00x 0.99x 1.00x 1.01x 1.00x 1.00x 256B 1.05x 1.03x 1.00x 1.02x 1.05x 1.06x 1.05x 1.02x 1.05x 1.02x 1024B 1.05x 1.02x 1.00x 1.02x 1.05x 1.06x 1.05x 1.03x 1.05x 1.02x 8192B 1.05x 1.02x 1.00x 1.02x 1.06x 1.06x 1.04x 1.03x 1.04x 1.02x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.01x 1.00x 1.01x 1.01x 1.00x 1.00x 0.99x 1.03x 1.01x 1.01x 64B 1.00x 1.00x 1.00x 1.00x 1.00x 1.00x 1.00x 1.01x 1.00x 1.02x 256B 1.05x 1.02x 1.00x 1.02x 1.05x 1.02x 1.04x 1.05x 1.05x 1.02x 1024B 1.06x 1.02x 1.00x 1.02x 1.07x 1.06x 1.05x 1.04x 1.05x 1.02x 8192B 1.05x 1.02x 1.00x 1.02x 1.06x 1.06x 1.04x 1.05x 1.05x 1.02x serpent-avx-x86_64 vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.26x 1.73x ecb-dec 1.20x 1.64x cbc-enc 0.33x 0.45x cbc-dec 1.24x 1.67x ctr-enc 1.32x 1.76x ctr-dec 1.32x 1.76x lrw-enc 1.20x 1.60x lrw-dec 1.15x 1.54x xts-enc 1.22x 1.64x xts-dec 1.17x 1.57x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3420fee..83caa4b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o @@ -34,6 +35,7 @@ twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 0000000..0ed47a1 --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -0,0 +1,704 @@ +/* + * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "serpent-avx-x86_64-asm_64.S" +.text + +#define CTX %rdi + +/********************************************************************** + 8-way AVX serpent + **********************************************************************/ +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 +#define RE1 %xmm4 + +#define tp %xmm5 + +#define RA2 %xmm6 +#define RB2 %xmm7 +#define RC2 %xmm8 +#define RD2 %xmm9 +#define RE2 %xmm10 + +#define RNOT %xmm11 + +#define RK0 %xmm12 +#define RK1 %xmm13 +#define RK2 %xmm14 +#define RK3 %xmm15 + + +#define S0_1(x0, x1, x2, x3, x4) \ + vpor x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x4; \ + vpxor RNOT, x4, x4; \ + vpxor x1, tp, x3; \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpxor x0, x2, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + vpxor x3, x0, x0; \ + vpor x0, x4, x4; \ + vpxor x2, x0, x0; \ + vpand x1, x2, x2; \ + vpxor x2, x3, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x4, x2, x2; \ + vpxor x2, x1, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, tp; \ + vpxor x3, x0, x0; \ + vpxor RNOT, x3, x3; \ + vpand tp, x1, x4; \ + vpor tp, x0, x0; \ + vpxor x2, x3, x3; \ + vpxor x3, x0, x0; \ + vpxor x3, tp, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpor x4, x1, x1; \ + vpxor x2, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x2, x2; \ + vpor x0, x1, x1; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x0, x0; \ + vpxor x1, x4, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, tp; \ + vpxor x3, tp, tp; \ + vpor x0, x3, x3; \ + vpxor x1, x2, x2; \ + vpxor x1, x3, x3; \ + vpand tp, x1, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + vpxor x2, tp, tp; \ + vpand x3, x2, x2; \ + vpor x1, x3, x3; \ + vpxor RNOT, tp, tp; \ + vpxor tp, x3, x3; \ + vpxor tp, x0, x4; \ + vpxor x2, tp, x0; \ + vpor x2, x1, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, tp; \ + vpor x0, x3, x3; \ + vpand x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpxor tp, x2, x2; \ + vpand x3, tp, x1; \ + vpxor x3, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x4, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpand x3, x0, x0; \ + vpand x4, x3, x3; \ + vpxor x2, x3, x3; \ + vpor x1, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x4, x4; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor x1, x0, x0; \ + vpxor tp, x3, x4; \ + vpor x0, x2, x2; \ + vpxor x1, x2, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpand x2, x4, x4; \ + vpxor tp, x2, x2; \ + vpxor x0, x4, x4; \ + vpor x1, tp, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x0, x3, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + vpor x0, x1, tp; \ + vpxor tp, x2, x2; \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpand x4, tp, x1; \ + vpor x3, x4, x4; \ + vpxor x0, x4, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + vpand x3, x0, x0; \ + vpxor x3, x1, x1; \ + vpxor x2, x3, x3; \ + vpxor x1, x0, x0; \ + vpand x4, x2, x2; \ + vpxor x2, x1, x1; \ + vpand x0, x2, x2; \ + vpxor x2, x3, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + vpxor x0, x3, x3; \ + vpxor x2, x1, tp; \ + vpxor x0, x2, x2; \ + vpand x3, x0, x0; \ + vpor x3, tp, tp; \ + vpxor RNOT, x1, x4; \ + vpxor tp, x0, x0; \ + vpxor x2, tp, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x4, x4; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x3, x3; \ + vpxor x2, x1, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x1, tp; \ + vpxor RNOT, x0, x0; \ + vpand x2, tp, x1; \ + vpxor x3, x1, x1; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x4; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x3; \ + vpor x1, x0, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + vpand x0, x2, x2; \ + vpxor x4, x0, x0; \ + vpxor x3, x4, x4; \ + vpand x0, x3, x3; \ + vpxor x1, x4, x4; \ + vpxor x4, x2, x2; \ + vpxor x1, x3, x3; \ + vpor x0, x4, x4; \ + vpxor x1, x4, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpor x1, x3, tp; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpxor tp, x2, x2; \ + vpxor x0, tp, x3; \ + vpand x1, x0, x0; \ + vpxor x2, x0, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + vpand x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x3, x2, x2; \ + vpxor x3, x1, x1; \ + vpand x0, x3, x3; \ + vpxor x0, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x4, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, tp; \ + vpxor RNOT, x2, x2; \ + vpor x1, x0, x4; \ + vpxor x3, x4, x4; \ + vpand x1, x3, x3; \ + vpxor x2, x1, x1; \ + vpand x4, x2, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + vpxor x1, x4, x4; \ + vpor x3, x1, x1; \ + vpxor tp, x3, x3; \ + vpxor tp, x2, x2; \ + vpor x4, tp, x0; \ + vpxor x4, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor x1, x4, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpxor RNOT, x3, tp; \ + vpor x2, tp, tp; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x4; \ + vpxor x1, tp, x3; \ + vpor x2, x1, x1; \ + vpxor x0, x2, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + vpxor x4, x1, x1; \ + vpor x3, x4, x4; \ + vpxor x3, x2, x2; \ + vpxor x2, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpand x2, x1, tp; \ + vpxor x0, tp, tp; \ + vpor x1, x0, x0; \ + vpxor x3, x1, x4; \ + vpxor x3, x0, x0; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x1, x0, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + vpxor x3, x2, x2; \ + vpand x1, x0, tp; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor RNOT, x0, x4; \ + vpxor tp, x1, x1; \ + vpxor x2, tp, x0; \ + vpand x4, x2, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + vpxor x0, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x0, x0; \ + vpand x2, x3, x3; \ + vpxor x3, x4, x4; \ + vpxor x1, x3, x3; \ + vpand x0, x1, x1; \ + vpxor x1, x4, x4; \ + vpxor x3, x0, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + vpor x2, x1, tp; \ + vpxor x1, x2, x2; \ + vpxor x3, tp, tp; \ + vpand x1, x3, x3; \ + vpxor x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x3, x3; \ + vpor x0, x2, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + vpxor tp, x1, x4; \ + vpxor x4, x2, x2; \ + vpand x0, x4, x4; \ + vpxor tp, x0, x0; \ + vpxor x3, tp, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpxor x2, x0, x0; \ + vpxor x4, x2, x2; \ + vpxor x3, x4, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + vpxor x2, x0, x0; \ + vpand x3, x0, tp; \ + vpxor x3, x2, x2; \ + vpxor x2, tp, tp; \ + vpxor x1, x3, x3; \ + vpor x0, x2, x2; \ + vpxor x3, x2, x2; \ + vpand tp, x3, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + vpxor RNOT, tp, tp; \ + vpxor x1, x3, x3; \ + vpand x2, x1, x1; \ + vpxor tp, x0, x4; \ + vpxor x4, x3, x3; \ + vpxor x2, x4, x4; \ + vpxor x1, tp, x0; \ + vpxor x0, x2, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x2, x0, x0; \ + vpor x3, x2, x2; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpor tp, x1, x1; \ + vpxor x0, x4, x4; \ + vpand x2, x0, x0; \ + vpxor x1, x0, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + vpand x2, x1, x1; \ + vpxor x2, tp, x3; \ + vpxor x3, x4, x4; \ + vpand x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor x4, x1, x1; \ + vpxor x4, x3, x3; \ + vpand x0, x4, x4; \ + vpxor x2, x4, x4; + +#define get_key(i, j, t) \ + vbroadcastss (4*(i)+(j))*4(CTX), t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + vpslld $13, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $13, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $1, x1 ## 1, x4 ## 1; \ + vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + get_key(i, 1, RK1); \ + vpslld $1, x1 ## 2, x4 ## 2; \ + vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + get_key(i, 3, RK3); \ + vpslld $7, x3 ## 1, x4 ## 1; \ + vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + vpslld $7, x3 ## 2, x4 ## 2; \ + vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpslld $5, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $22, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpslld $5, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $22, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpsrld $5, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpsrld $22, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpsrld $5, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpsrld $22, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $1, x1 ## 1, x4 ## 1; \ + vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpsrld $1, x1 ## 2, x4 ## 2; \ + vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpsrld $7, x3 ## 1, x4 ## 1; \ + vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $7, x3 ## 2, x4 ## 2; \ + vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $13, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $3, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $13, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $3, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 3, RK3); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vmovdqu (2*4*4)(in), x2; \ + vmovdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); \ + vmovdqu x2, (2*4*4)(out); \ + vmovdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.align 8 +.global __serpent_enc_blk_8way +.type __serpent_enc_blk_8way,@function; + +__serpent_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + leaq (4*4*4)(%rsi), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +__enc_xor8: + xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +.align 8 +.global serpent_dec_blk_8way +.type serpent_dec_blk_8way,@function; + +serpent_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + leaq (4*4*4)(%rsi), %rax; + write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + ret; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 0000000..0dc7a26 --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -0,0 +1,949 @@ +/* + * Glue Code for AVX assembler versions of Serpent Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Glue code based on serpent_sse2_glue.c by: + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct async_serpent_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + if (fpu_enabled) + return true; + + /* AVX is only used when chunk to be processed is large enough, so + * do not enable FPU until it is necessary. + */ + if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) + return false; + + kernel_fpu_begin(); + return true; +} + +static inline void serpent_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + if (enc) + serpent_enc_blk_xway(ctx, wdst, wsrc); + else + serpent_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * SERPENT_PARALLEL_BLOCKS; + wdst += bsize * SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __serpent_encrypt(ctx, wdst, wsrc); + else + __serpent_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + u128 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); + src -= SERPENT_PARALLEL_BLOCKS - 1; + dst -= SERPENT_PARALLEL_BLOCKS - 1; + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[SERPENT_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __serpent_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; + int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblocks[i], &ctrblk); + u128_inc(&ctrblk); + } + + serpent_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += SERPENT_PARALLEL_BLOCKS; + dst += SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +struct crypt_priv { + struct serpent_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct serpent_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct serpent_ctx serpent_ctx; +}; + +static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - + SERPENT_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - + SERPENT_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct serpent_xts_ctx { + struct serpent_ctx tweak_ctx; + struct serpent_ctx crypt_ctx; +}; + +static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static int ablk_init(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} + +static struct crypto_alg serpent_algs[10] = { { + .cra_name = "__ecb-serpent-avx", + .cra_driver_name = "__driver-ecb-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-serpent-avx", + .cra_driver_name = "__driver-cbc-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-serpent-avx", + .cra_driver_name = "__driver-ctr-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-serpent-avx", + .cra_driver_name = "__driver-lrw-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = lrw_serpent_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-serpent-avx", + .cra_driver_name = "__driver-xts-serpent-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(serpent)", + .cra_driver_name = "ecb-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(serpent)", + .cra_driver_name = "cbc-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(serpent)", + .cra_driver_name = "ctr-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(serpent)", + .cra_driver_name = "lrw-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(serpent)", + .cra_driver_name = "xts-serpent-avx", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init serpent_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + printk(KERN_INFO "AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + printk(KERN_INFO "AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +static void __exit serpent_exit(void) +{ + crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +module_init(serpent_init); +module_exit(serpent_exit); + +MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("serpent"); diff --git a/crypto/Kconfig b/crypto/Kconfig index e00a4e4..2c1c2df 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -821,6 +821,26 @@ config CRYPTO_SERPENT_SSE2_586 See also: +config CRYPTO_SERPENT_AVX_X86_64 + tristate "Serpent cipher algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_CRYPTD + select CRYPTO_SERPENT + select CRYPTO_LRW + select CRYPTO_XTS + help + Serpent cipher algorithm, by Anderson, Biham & Knudsen. + + Keys are allowed to be from 0 to 256 bits in length, in steps + of 8 bits. + + This module provides the Serpent cipher algorithm that processes + eight blocks parallel using the AVX instruction set. + + See also: + + config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 73b3ec6..36748a5 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { + .alg = "__cbc-serpent-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__cbc-serpent-sse2", .test = alg_test_null, .suite = { @@ -1579,6 +1594,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "__driver-cbc-serpent-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__driver-cbc-serpent-sse2", .test = alg_test_null, .suite = { @@ -1624,6 +1654,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "__driver-ecb-serpent-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__driver-ecb-serpent-sse2", .test = alg_test_null, .suite = { @@ -1836,6 +1881,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "cryptd(__driver-ecb-serpent-avx)", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "cryptd(__driver-ecb-serpent-sse2)", .test = alg_test_null, .suite = { -- cgit v0.10.2 From d691af000274ae9658695c2a63a76b30890a8983 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 12 Jun 2012 16:50:55 +0800 Subject: crypto: s390 - clean up DES code a bit more Commit 98971f8439b1bb9a61682fe24a865ddd25167a6b ("crypto: s390 - cleanup DES code") should have also removed crypto_des.h. That file is unused and unneeded since that commit. So let's clean up that file too. Signed-off-by: Paul Bolle Acked-by: Jan Glauber Signed-off-by: Herbert Xu diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h deleted file mode 100644 index 6210457..0000000 --- a/arch/s390/crypto/crypto_des.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#ifndef __CRYPTO_DES_H__ -#define __CRYPTO_DES_H__ - -extern int crypto_des_check_key(const u8*, unsigned int, u32*); - -#endif /*__CRYPTO_DES_H__*/ -- cgit v0.10.2 From 31b4cd2907322bd56e4090168e4ce8f8dcd17820 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 12 Jun 2012 16:52:04 +0800 Subject: crypto: testmgr - add ecb(arc4) speed tests Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 2af8797..58ad4fe 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1342,6 +1342,11 @@ static int do_test(int m) speed_template_32_64); break; + case 208: + test_cipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0, + speed_template_8); + break; + case 300: /* fall through */ @@ -1589,6 +1594,11 @@ static int do_test(int m) speed_template_32_48_64); break; + case 505: + test_acipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0, + speed_template_8); + break; + case 1000: test_available(); break; -- cgit v0.10.2 From ce6dd368984068c8938e8d3fba8e292ef63cee97 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 9 Jun 2012 18:25:40 +0300 Subject: crypto: arc4 - improve performance by adding ecb(arc4) Currently arc4.c provides simple one-byte blocksize cipher which is wrapped by ecb() module, giving function call overhead on every encrypted byte. This patch adds ecb(arc4) directly into arc4.c for higher performance. tcrypt results (speed ratios: new/old): AMD Phenom II, x86-64 : x2.7 Intel Core 2, x86-64 : x1.9 Intel Atom N260, i386 : x1.4 Cc: Jon Oberheide Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/crypto/arc4.c b/crypto/arc4.c index 0d12a96..07913fc 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -11,9 +11,11 @@ * (at your option) any later version. * */ + #include #include #include +#include #define ARC4_MIN_KEY_SIZE 1 #define ARC4_MAX_KEY_SIZE 256 @@ -48,51 +50,114 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static void arc4_crypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, + unsigned int len) { - struct arc4_ctx *ctx = crypto_tfm_ctx(tfm); - u8 *const S = ctx->S; - u8 x = ctx->x; - u8 y = ctx->y; - u8 a, b; + u8 x, y, a, b; + u8 ty, ta, tb; + + if (len == 0) + return; + + x = ctx->x; + y = ctx->y; a = S[x]; y = (y + a) & 0xff; b = S[y]; - S[x] = b; - S[y] = a; - x = (x + 1) & 0xff; - *out++ = *in ^ S[(a + b) & 0xff]; + + do { + S[y] = a; + a = (a + b) & 0xff; + S[x] = b; + x = (x + 1) & 0xff; + ta = S[x]; + ty = (y + ta) & 0xff; + tb = S[ty]; + *out++ = *in++ ^ S[a]; + if (--len == 0) + break; + y = ty; + a = ta; + b = tb; + } while (true); ctx->x = x; ctx->y = y; } -static struct crypto_alg arc4_alg = { +static void arc4_crypt_one(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + arc4_crypt(crypto_tfm_ctx(tfm), out, in, 1); +} + +static int ecb_arc4_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct arc4_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + + err = blkcipher_walk_virt(desc, &walk); + + while (walk.nbytes > 0) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; + + arc4_crypt(ctx, wdst, wsrc, walk.nbytes); + + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg arc4_algs[2] = { { .cra_name = "arc4", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = ARC4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct arc4_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(arc4_alg.cra_list), - .cra_u = { .cipher = { - .cia_min_keysize = ARC4_MIN_KEY_SIZE, - .cia_max_keysize = ARC4_MAX_KEY_SIZE, - .cia_setkey = arc4_set_key, - .cia_encrypt = arc4_crypt, - .cia_decrypt = arc4_crypt } } -}; + .cra_u = { + .cipher = { + .cia_min_keysize = ARC4_MIN_KEY_SIZE, + .cia_max_keysize = ARC4_MAX_KEY_SIZE, + .cia_setkey = arc4_set_key, + .cia_encrypt = arc4_crypt_one, + .cia_decrypt = arc4_crypt_one, + }, + }, +}, { + .cra_name = "ecb(arc4)", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = ARC4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct arc4_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = ARC4_MIN_KEY_SIZE, + .max_keysize = ARC4_MAX_KEY_SIZE, + .setkey = arc4_set_key, + .encrypt = ecb_arc4_crypt, + .decrypt = ecb_arc4_crypt, + }, + }, +} }; static int __init arc4_init(void) { - return crypto_register_alg(&arc4_alg); + return crypto_register_algs(arc4_algs, ARRAY_SIZE(arc4_algs)); } - static void __exit arc4_exit(void) { - crypto_unregister_alg(&arc4_alg); + crypto_unregister_algs(arc4_algs, ARRAY_SIZE(arc4_algs)); } module_init(arc4_init); -- cgit v0.10.2 From d366db605c8c4a9878589bc4a87e55f6063184ac Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 9 Jun 2012 18:25:46 +0300 Subject: crypto: arc4 - improve performance by using u32 for ctx and variables This patch changes u8 in struct arc4_ctx and variables to u32 (as AMD seems to have problem with u8 array). Below are tcrypt results of old 1-byte block cipher versus ecb(arc4) with u8 and ecb(arc4) with u32. tcrypt results, x86-64 (speed ratios: new-u32/old, new-u8/old): u32 u8 AMD Phenom II : x3.6 x2.7 Intel Core 2 : x2.0 x1.9 tcrypt results, i386 (speed ratios: new-u32/old, new-u8/old): u32 u8 Intel Atom N260 : x1.5 x1.4 Cc: Jon Oberheide Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/crypto/arc4.c b/crypto/arc4.c index 07913fc..5a772c3 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -22,8 +22,8 @@ #define ARC4_BLOCK_SIZE 1 struct arc4_ctx { - u8 S[256]; - u8 x, y; + u32 S[256]; + u32 x, y; }; static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, @@ -39,7 +39,7 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, ctx->S[i] = i; for (i = 0; i < 256; i++) { - u8 a = ctx->S[i]; + u32 a = ctx->S[i]; j = (j + in_key[k] + a) & 0xff; ctx->S[i] = ctx->S[j]; ctx->S[j] = a; @@ -53,9 +53,9 @@ static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len) { - u8 *const S = ctx->S; - u8 x, y, a, b; - u8 ty, ta, tb; + u32 *const S = ctx->S; + u32 x, y, a, b; + u32 ty, ta, tb; if (len == 0) return; -- cgit v0.10.2 From 3387e7d69048f5ab02729825f9611754850d9a87 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 Jun 2012 10:09:03 +0800 Subject: crypto: serpent-sse2/avx - allow both to be built into kernel Rename serpent-avx assembler functions so that they do not collide with serpent-sse2 assembler functions when linking both versions in to same kernel image. Reported-by: Randy Dunlap Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 0ed47a1..504106b 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -579,10 +579,10 @@ vmovdqu x3, (3*4*4)(out); .align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; +.global __serpent_enc_blk_8way_avx +.type __serpent_enc_blk_8way_avx,@function; -__serpent_enc_blk_8way: +__serpent_enc_blk_8way_avx: /* input: * %rdi: ctx, CTX * %rsi: dst @@ -647,10 +647,10 @@ __enc_xor8: ret; .align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; +.global serpent_dec_blk_8way_avx +.type serpent_dec_blk_8way_avx,@function; -serpent_dec_blk_8way: +serpent_dec_blk_8way_avx: /* input: * %rdi: ctx, CTX * %rsi: dst diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 0dc7a26..dd81bab 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be8..deecd25 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/serpent-avx.h b/arch/x86/include/asm/serpent-avx.h new file mode 100644 index 0000000..432deed --- /dev/null +++ b/arch/x86/include/asm/serpent-avx.h @@ -0,0 +1,32 @@ +#ifndef ASM_X86_SERPENT_AVX_H +#define ASM_X86_SERPENT_AVX_H + +#include +#include + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way_avx(ctx, dst, src); +} + +#endif diff --git a/arch/x86/include/asm/serpent-sse2.h b/arch/x86/include/asm/serpent-sse2.h new file mode 100644 index 0000000..e6e77df --- /dev/null +++ b/arch/x86/include/asm/serpent-sse2.h @@ -0,0 +1,63 @@ +#ifndef ASM_X86_SERPENT_SSE2_H +#define ASM_X86_SERPENT_SSE2_H + +#include +#include + +#ifdef CONFIG_X86_32 + +#define SERPENT_PARALLEL_BLOCKS 4 + +asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_4way(ctx, dst, src); +} + +#else + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way(ctx, dst, src); +} + +#endif + +#endif diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/serpent.h deleted file mode 100644 index d3ef63f..0000000 --- a/arch/x86/include/asm/serpent.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef ASM_X86_SERPENT_H -#define ASM_X86_SERPENT_H - -#include -#include - -#ifdef CONFIG_X86_32 - -#define SERPENT_PARALLEL_BLOCKS 4 - -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_4way(ctx, dst, src); -} - -#else - -#define SERPENT_PARALLEL_BLOCKS 8 - -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way(ctx, dst, src); -} - -#endif - -#endif -- cgit v0.10.2 From 398710379f516012c52d2ae396a9ba919bd6a7ab Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Jun 2012 20:08:29 +0800 Subject: crypto: algapi - Move larval completion into algboss It has been observed that sometimes the crypto allocation code will get stuck for 60 seconds or multiples thereof. This is usually caused by an algorithm failing to pass the self-test. If an algorithm fails to be constructed, we will immediately notify all larval waiters. However, if it succeeds in construction, but then fails the self-test, we won't notify anyone at all. This patch fixes this by merging the notification in the case where the algorithm fails to be constructed with that of the the case where it pases the self-test. This way regardless of what happens, we'll give the larval waiters an answer. Signed-off-by: Herbert Xu diff --git a/crypto/algapi.c b/crypto/algapi.c index 056571b..c3b9bfe 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -24,22 +24,6 @@ static LIST_HEAD(crypto_template_list); -void crypto_larval_error(const char *name, u32 type, u32 mask) -{ - struct crypto_alg *alg; - - alg = crypto_alg_lookup(name, type, mask); - - if (alg) { - if (crypto_is_larval(alg)) { - struct crypto_larval *larval = (void *)alg; - complete_all(&larval->completion); - } - crypto_mod_put(alg); - } -} -EXPORT_SYMBOL_GPL(crypto_larval_error); - static inline int crypto_set_driver_name(struct crypto_alg *alg) { static const char suffix[] = "-generic"; @@ -295,7 +279,6 @@ found: continue; larval->adult = alg; - complete_all(&larval->completion); continue; } diff --git a/crypto/algboss.c b/crypto/algboss.c index 791d194..f97027e 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -47,6 +48,8 @@ struct cryptomgr_param { char larval[CRYPTO_MAX_ALG_NAME]; char template[CRYPTO_MAX_ALG_NAME]; + struct completion *completion; + u32 otype; u32 omask; }; @@ -66,7 +69,7 @@ static int cryptomgr_probe(void *data) tmpl = crypto_lookup_template(param->template); if (!tmpl) - goto err; + goto out; do { if (tmpl->create) { @@ -83,16 +86,10 @@ static int cryptomgr_probe(void *data) crypto_tmpl_put(tmpl); - if (err) - goto err; - out: + complete(param->completion); kfree(param); module_put_and_exit(0); - -err: - crypto_larval_error(param->larval, param->otype, param->omask); - goto out; } static int cryptomgr_schedule_probe(struct crypto_larval *larval) @@ -192,10 +189,14 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) memcpy(param->larval, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME); + param->completion = &larval->completion; + thread = kthread_run(cryptomgr_probe, param, "cryptomgr_probe"); if (IS_ERR(thread)) goto err_free_param; + wait_for_completion_interruptible(&larval->completion); + return NOTIFY_STOP; err_free_param: diff --git a/crypto/internal.h b/crypto/internal.h index b865ca1..9ebedae 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -83,7 +83,6 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm); struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask); void crypto_larval_kill(struct crypto_alg *alg); struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask); -void crypto_larval_error(const char *name, u32 type, u32 mask); void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, -- cgit v0.10.2 From 3621189064301a5fbb5d06ca17d966a026f4e501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Th=C3=A9baudeau?= Date: Wed, 13 Jun 2012 18:15:34 +0200 Subject: hwrng: mxc-rnga - fix data_present API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 45001e9, which added support for RNGA, ignored the previous commit 984e976, which changed the data_present API. Cc: Matt Mackall Cc: Sascha Hauer Cc: Alan Carvalho de Assis Cc: Signed-off-by: Benoît Thébaudeau Signed-off-by: Herbert Xu diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 187c6be..85074de 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* RNGA Registers */ @@ -60,16 +61,20 @@ static struct platform_device *rng_dev; -static int mxc_rnga_data_present(struct hwrng *rng) +static int mxc_rnga_data_present(struct hwrng *rng, int wait) { - int level; void __iomem *rng_base = (void __iomem *)rng->priv; - - /* how many random numbers is in FIFO? [0-16] */ - level = ((__raw_readl(rng_base + RNGA_STATUS) & - RNGA_STATUS_LEVEL_MASK) >> 8); - - return level > 0 ? 1 : 0; + int i; + + for (i = 0; i < 20; i++) { + /* how many random numbers are in FIFO? [0-16] */ + int level = (__raw_readl(rng_base + RNGA_STATUS) & + RNGA_STATUS_LEVEL_MASK) >> 8; + if (level || !wait) + return !!level; + udelay(10); + } + return 0; } static int mxc_rnga_data_read(struct hwrng *rng, u32 * data) -- cgit v0.10.2 From 95ead5d7ff824a01cb07921c9211a7e29437a929 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Wed, 13 Jun 2012 13:22:42 -0500 Subject: crypto: nx - move nx build to driver/crypto Makefile When the nx driver was pulled, the Makefile that actually builds it is arch/powerpc/Makefile. This is unnatural. This patch moves the line that builds the nx driver from arch/powerpc/Makefile to drivers/crypto/Makefile where it belongs. Signed-off-by: Seth Jennings Acked-by: Kent Yoder Signed-off-by: Herbert Xu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 950d1f7..159e94f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -149,7 +149,6 @@ core-$(CONFIG_KVM) += arch/powerpc/kvm/ core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ -drivers-$(CONFIG_CRYPTO_DEV_NX) += drivers/crypto/nx/ # Default to zImage, override when needed all: zImage diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index d5062bb..1c5a145 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o +obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ -- cgit v0.10.2 From 7c76bdd7c3baf6d2431bb801f5b11d2ac195fdd6 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Wed, 13 Jun 2012 13:22:43 -0500 Subject: crypto: nx - fix typo in nx driver config option Signed-off-by: Seth Jennings Acked-by: Kent Yoder Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index dd4d5af..be6b2ba 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -298,7 +298,7 @@ config CRYPTO_DEV_TEGRA_AES will be called tegra-aes. config CRYPTO_DEV_NX - tristate "Support for Power7+ in-Nest cryptographic accleration" + tristate "Support for Power7+ in-Nest cryptographic acceleration" depends on PPC64 && IBMVIO select CRYPTO_AES select CRYPTO_CBC -- cgit v0.10.2 From ffaf9156320a077ebf9c5b9a5cf987689dc1a6b3 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:06:58 +0300 Subject: crypto: ablk_helper - move ablk_* functions from serpent-sse2/avx glue code to shared module Move ablk-* functions to separate module to share common code between cipher implementations. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 83caa4b..ad74691 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o + obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c new file mode 100644 index 0000000..284ca3b --- /dev/null +++ b/arch/x86/crypto/ablk_helper.c @@ -0,0 +1,150 @@ +/* + * Shared async block cipher helpers + * + * Copyright (c) 2012 Jussi Kivilinna + * + * Based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} +EXPORT_SYMBOL_GPL(ablk_set_key); + +int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} +EXPORT_SYMBOL_GPL(__ablk_encrypt); + +int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} +EXPORT_SYMBOL_GPL(ablk_encrypt); + +int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} +EXPORT_SYMBOL_GPL(ablk_decrypt); + +void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} +EXPORT_SYMBOL_GPL(ablk_exit); + +void ablk_init_common(struct crypto_tfm *tfm, + struct cryptd_ablkcipher *cryptd_tfm) +{ + struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); +} +EXPORT_SYMBOL_GPL(ablk_init_common); + +int ablk_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ablk_init_common(tfm, cryptd_tfm); + + return 0; +} +EXPORT_SYMBOL_GPL(ablk_init); + +MODULE_LICENSE("GPL"); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index dd81bab..31eb567 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -40,14 +40,11 @@ #include #include #include +#include #include #include #include -struct async_serpent_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) { if (fpu_enabled) @@ -593,106 +590,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int __ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - return __ablk_encrypt(req); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static int ablk_init(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - struct cryptd_ablkcipher *cryptd_tfm; - char drv_name[CRYPTO_MAX_ALG_NAME]; - - snprintf(drv_name, sizeof(drv_name), "__driver-%s", - crypto_tfm_alg_driver_name(tfm)); - - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); - - return 0; -} - static struct crypto_alg serpent_algs[10] = { { .cra_name = "__ecb-serpent-avx", .cra_driver_name = "__driver-ecb-serpent-avx", @@ -805,7 +702,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 500, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -827,7 +724,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 500, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -850,7 +747,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 500, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -874,7 +771,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 500, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -899,7 +796,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 500, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index deecd25..805c91f 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -43,14 +43,11 @@ #include #include #include +#include #include #include #include -struct async_serpent_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) { if (fpu_enabled) @@ -596,106 +593,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int __ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - return __ablk_encrypt(req); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static int ablk_init(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - struct cryptd_ablkcipher *cryptd_tfm; - char drv_name[CRYPTO_MAX_ALG_NAME]; - - snprintf(drv_name, sizeof(drv_name), "__driver-%s", - crypto_tfm_alg_driver_name(tfm)); - - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); - - return 0; -} - static struct crypto_alg serpent_algs[10] = { { .cra_name = "__ecb-serpent-sse2", .cra_driver_name = "__driver-ecb-serpent-sse2", @@ -808,7 +705,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -830,7 +727,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -853,7 +750,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -877,7 +774,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -902,7 +799,7 @@ static struct crypto_alg serpent_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h new file mode 100644 index 0000000..6d6b37c --- /dev/null +++ b/arch/x86/include/asm/crypto/ablk_helper.h @@ -0,0 +1,29 @@ +/* + * Shared async block cipher helpers + */ + +#ifndef _CRYPTO_ABLK_HELPER_H +#define _CRYPTO_ABLK_HELPER_H + +#include +#include +#include + +struct async_helper_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len); + +extern int __ablk_encrypt(struct ablkcipher_request *req); + +extern int ablk_encrypt(struct ablkcipher_request *req); + +extern int ablk_decrypt(struct ablkcipher_request *req); + +extern void ablk_exit(struct crypto_tfm *tfm); + +extern int ablk_init(struct crypto_tfm *tfm); + +#endif /* _CRYPTO_ABLK_HELPER_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 2c1c2df..8e9145c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -174,6 +174,11 @@ config CRYPTO_TEST help Quick & dirty crypto test module. +config CRYPTO_ABLK_HELPER_X86 + tristate + depends on X86 + select CRYPTO_CRYPTD + comment "Authenticated Encryption with Associated Data" config CRYPTO_CCM @@ -786,6 +791,7 @@ config CRYPTO_SERPENT_SSE2_X86_64 depends on X86 && 64BIT select CRYPTO_ALGAPI select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS @@ -806,6 +812,7 @@ config CRYPTO_SERPENT_SSE2_586 depends on X86 && !64BIT select CRYPTO_ALGAPI select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS @@ -826,6 +833,7 @@ config CRYPTO_SERPENT_AVX_X86_64 depends on X86 && 64BIT select CRYPTO_ALGAPI select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS -- cgit v0.10.2 From 30a04008827b58c4aafbd1d6a27d6b6ed239e993 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:03 +0300 Subject: crypto: twofish-avx - change to use shared ablk_* functions Remove duplicate ablk_* functions and make use of ablk_helper module instead. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 599f19e..cabe058 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -95,11 +96,6 @@ static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, } - -struct async_twofish_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) { if (fpu_enabled) @@ -730,106 +726,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int __ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - return __ablk_encrypt(req); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static int ablk_init(struct crypto_tfm *tfm) -{ - struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm); - struct cryptd_ablkcipher *cryptd_tfm; - char drv_name[CRYPTO_MAX_ALG_NAME]; - - snprintf(drv_name, sizeof(drv_name), "__driver-%s", - crypto_tfm_alg_driver_name(tfm)); - - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); - - return 0; -} - static struct crypto_alg twofish_algs[10] = { { .cra_name = "__ecb-twofish-avx", .cra_driver_name = "__driver-ecb-twofish-avx", @@ -942,7 +838,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -964,7 +860,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -987,7 +883,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1011,7 +907,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1036,7 +932,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_twofish_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, diff --git a/crypto/Kconfig b/crypto/Kconfig index 8e9145c..24b929e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -946,6 +946,7 @@ config CRYPTO_TWOFISH_AVX_X86_64 depends on X86 && 64BIT select CRYPTO_ALGAPI select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY -- cgit v0.10.2 From a9629d7142ea22567eaa999232d8a31a7493665a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:08 +0300 Subject: crypto: aes_ni - change to use shared ablk_* functions Remove duplicate ablk_* functions and make use of ablk_helper module instead. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c index 284ca3b..43282fe 100644 --- a/arch/x86/crypto/ablk_helper.c +++ b/arch/x86/crypto/ablk_helper.c @@ -118,32 +118,31 @@ void ablk_exit(struct crypto_tfm *tfm) } EXPORT_SYMBOL_GPL(ablk_exit); -void ablk_init_common(struct crypto_tfm *tfm, - struct cryptd_ablkcipher *cryptd_tfm) +int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) { struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); ctx->cryptd_tfm = cryptd_tfm; tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; } EXPORT_SYMBOL_GPL(ablk_init_common); int ablk_init(struct crypto_tfm *tfm) { - struct cryptd_ablkcipher *cryptd_tfm; char drv_name[CRYPTO_MAX_ALG_NAME]; snprintf(drv_name, sizeof(drv_name), "__driver-%s", crypto_tfm_alg_driver_name(tfm)); - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ablk_init_common(tfm, cryptd_tfm); - - return 0; + return ablk_init_common(tfm, drv_name); } EXPORT_SYMBOL_GPL(ablk_init); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ac7f5cd..7c9d54d8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -52,10 +53,6 @@ #define HAS_XTS #endif -struct async_aes_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. @@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, } #endif -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) -{ - struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); - - return 0; -} - static int ablk_ecb_init(struct crypto_tfm *tfm) { return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); @@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, @@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_ctxsize = sizeof(struct async_helper_ctx), .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h index 6d6b37c..4f93df5 100644 --- a/arch/x86/include/asm/crypto/ablk_helper.h +++ b/arch/x86/include/asm/crypto/ablk_helper.h @@ -24,6 +24,8 @@ extern int ablk_decrypt(struct ablkcipher_request *req); extern void ablk_exit(struct crypto_tfm *tfm); +extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); + extern int ablk_init(struct crypto_tfm *tfm); #endif /* _CRYPTO_ABLK_HELPER_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 24b929e..471cc46 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -557,6 +557,7 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_AES_X86_64 if 64BIT select CRYPTO_AES_586 if !64BIT select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 select CRYPTO_ALGAPI help Use Intel AES-NI instructions for AES algorithm. -- cgit v0.10.2 From e81792fbc2a6fa4969f724b959829667fb2d4f01 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:14 +0300 Subject: crypto: serpent-sse2 - prepare serpent-sse2 glue code into generic x86 glue code for 128bit block ciphers Block cipher implementations in arch/x86/crypto/ contain common glue code that is currently duplicated in each module (camellia-x86_64, twofish-x86_64-3way, twofish-avx, serpent-sse2 and serpent-avx). This patch prepares serpent-sse2 glue into generic glue code for all 128bit block ciphers to use in arch/x86/crypto. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 805c91f..8c86239 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -48,105 +48,129 @@ #include #include -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); +typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, + u128 *iv); + +#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) +#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) +#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) + +struct common_glue_func_entry { + unsigned int num_blocks; /* number of blocks that @fn will process */ + union { + common_glue_func_t ecb; + common_glue_cbc_func_t cbc; + common_glue_ctr_func_t ctr; + } fn_u; +}; + +struct common_glue_ctx { + unsigned int num_funcs; + int fpu_blocks_limit; /* -1 means fpu not needed at all */ + + /* + * First funcs entry must have largest num_blocks and last funcs entry + * must have num_blocks == 1! + */ + struct common_glue_func_entry funcs[]; +}; + +static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, + struct blkcipher_desc *desc, + bool fpu_enabled, unsigned int nbytes) { + if (likely(fpu_blocks_limit < 0)) + return false; + if (fpu_enabled) return true; - /* SSE2 is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. + /* + * Vector-registers are only used when chunk to be processed is large + * enough, so do not enable FPU until it is necessary. */ - if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) + if (nbytes < bsize * (unsigned int)fpu_blocks_limit) return false; + if (desc) { + /* prevent sleeping if FPU is in use */ + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + } + kernel_fpu_begin(); return true; } -static inline void serpent_fpu_end(bool fpu_enabled) +static inline void glue_fpu_end(bool fpu_enabled) { if (fpu_enabled) kernel_fpu_end(); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) +static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) { + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; + unsigned int nbytes, i, func_bytes; bool fpu_enabled = false; - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes; int err; err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk->nbytes)) { u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - if (enc) - serpent_enc_blk_xway(ctx, wdst, wsrc); - else - serpent_dec_blk_xway(ctx, wdst, wsrc); + for (i = 0; i < gctx->num_funcs; i++) { + func_bytes = bsize * gctx->funcs[i].num_blocks; - wsrc += bsize * SERPENT_PARALLEL_BLOCKS; - wdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + /* Process multi-block batch */ + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.ecb(ctx, wdst, + wsrc); - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - __serpent_encrypt(ctx, wdst, wsrc); - else - __serpent_decrypt(ctx, wdst, wsrc); + wsrc += func_bytes; + wdst += func_bytes; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); + if (nbytes < bsize) + goto done; + } + } done: err = blkcipher_walk_done(desc, walk, nbytes); } - serpent_fpu_end(fpu_enabled); + glue_fpu_end(fpu_enabled); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); + return __glue_ecb_crypt_128bit(gctx, desc, &walk); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; unsigned int nbytes = walk->nbytes; u128 *src = (u128 *)walk->src.virt.addr; u128 *dst = (u128 *)walk->dst.virt.addr; @@ -154,7 +178,7 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, do { u128_xor(dst, src, iv); - __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); + fn(ctx, (u8 *)dst, (u8 *)dst); iv = dst; src += 1; @@ -166,8 +190,10 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_cbc_encrypt_128bit(const common_glue_func_t fn, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { struct blkcipher_walk walk; int err; @@ -176,24 +202,26 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); + nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); err = blkcipher_walk_done(desc, &walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int +__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; unsigned int nbytes = walk->nbytes; u128 *src = (u128 *)walk->src.virt.addr; u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; u128 last_iv; - int i; + unsigned int num_blocks, func_bytes; + unsigned int i; /* Start of the last block. */ src += nbytes / bsize - 1; @@ -201,45 +229,31 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, last_iv = *src; - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); - src -= SERPENT_PARALLEL_BLOCKS - 1; - dst -= SERPENT_PARALLEL_BLOCKS - 1; + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; + /* Process multi-block batch */ + if (nbytes >= func_bytes) { + do { + nbytes -= func_bytes - bsize; + src -= num_blocks - 1; + dst -= num_blocks - 1; + + gctx->funcs[i].fn_u.cbc(ctx, dst, src); - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + nbytes -= bsize; + if (nbytes < bsize) + goto done; - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= func_bytes); - nbytes -= bsize; if (nbytes < bsize) goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; + } } done: @@ -249,24 +263,27 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { + const unsigned int bsize = 128 / 8; bool fpu_enabled = false; struct blkcipher_walk walk; int err; blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); + nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); err = blkcipher_walk_done(desc, &walk, nbytes); } - serpent_fpu_end(fpu_enabled); + glue_fpu_end(fpu_enabled); return err; } @@ -289,109 +306,232 @@ static inline void u128_inc(u128 *i) i->a++; } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[SERPENT_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; + void *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *src = (u8 *)walk->src.virt.addr; + u8 *dst = (u8 *)walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; + u128 ctrblk; + u128 tmp; - __serpent_encrypt(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + be128_to_u128(&ctrblk, (be128 *)walk->iv); - crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); + memcpy(&tmp, src, nbytes); + fn_ctr(ctx, &tmp, &tmp, &ctrblk); + memcpy(dst, &tmp, nbytes); + + u128_to_be128((be128 *)walk->iv, &ctrblk); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; + const unsigned int bsize = 128 / 8; + void *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int nbytes = walk->nbytes; u128 *src = (u128 *)walk->src.virt.addr; u128 *dst = (u128 *)walk->dst.virt.addr; u128 ctrblk; - be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; - int i; + unsigned int num_blocks, func_bytes; + unsigned int i; be128_to_u128(&ctrblk, (be128 *)walk->iv); /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - u128_to_be128(&ctrblocks[i], &ctrblk); - u128_inc(&ctrblk); - } + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, - (u8 *)ctrblocks); + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); - src += SERPENT_PARALLEL_BLOCKS; - dst += SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + src += num_blocks; + dst += num_blocks; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); - if (nbytes < bsize) - goto done; + if (nbytes < bsize) + goto done; + } } - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - done: u128_to_be128((be128 *)walk->iv, &ctrblk); return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { + const unsigned int bsize = 128 / 8; bool fpu_enabled = false; struct blkcipher_walk walk; int err; blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = blkcipher_walk_virt_block(desc, &walk, bsize); - while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); + while ((nbytes = walk.nbytes) >= bsize) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); + nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); err = blkcipher_walk_done(desc, &walk, nbytes); } - serpent_fpu_end(fpu_enabled); + glue_fpu_end(fpu_enabled); if (walk.nbytes) { - ctr_crypt_final(desc, &walk); + glue_ctr_crypt_final_128bit( + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); } return err; } +static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +{ + u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + unsigned int j; + + for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; + + serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); +} + +static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) +{ + be128 ctrblk; + + u128_to_be128(&ctrblk, iv); + u128_inc(iv); + + __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, src, (u128 *)&ctrblk); +} + +static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) +{ + be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; + unsigned int i; + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); + } + + serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); +} + +static const struct common_glue_ctx serpent_enc = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + } } +}; + +static const struct common_glue_ctx serpent_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } + } } +}; + +static const struct common_glue_ctx serpent_dec = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + } } +}; + +static const struct common_glue_ctx serpent_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + } } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, + dst, src, nbytes); +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, + nbytes); +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); +} + +static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void serpent_fpu_end(bool fpu_enabled) +{ + glue_fpu_end(fpu_enabled); +} + struct crypt_priv { struct serpent_ctx *ctx; bool fpu_enabled; -- cgit v0.10.2 From 596d875052dac6bf084f0c3a3e946fb4709b727b Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:19 +0300 Subject: crypto: serpent-sse2 - split generic glue code to new helper module Now that serpent-sse2 glue code has been made generic, it can be split to separate module. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index ad74691..e908e5d 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o +obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c new file mode 100644 index 0000000..4854f0f --- /dev/null +++ b/arch/x86/crypto/glue_helper.c @@ -0,0 +1,307 @@ +/* + * Shared glue code for 128bit block ciphers + * + * Copyright (c) 2012 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include + +static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; + unsigned int nbytes, i, func_bytes; + bool fpu_enabled = false; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); + + for (i = 0; i < gctx->num_funcs; i++) { + func_bytes = bsize * gctx->funcs[i].num_blocks; + + /* Process multi-block batch */ + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.ecb(ctx, wdst, + wsrc); + + wsrc += func_bytes; + wdst += func_bytes; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + goto done; + } + } + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + return err; +} + +int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return __glue_ecb_crypt_128bit(gctx, desc, &walk); +} +EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); + +static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + fn(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +int glue_cbc_encrypt_128bit(const common_glue_func_t fn, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); + +static unsigned int +__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + void *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = 128 / 8; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 last_iv; + unsigned int num_blocks, func_bytes; + unsigned int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + /* Process multi-block batch */ + if (nbytes >= func_bytes) { + do { + nbytes -= func_bytes - bsize; + src -= num_blocks - 1; + dst -= num_blocks - 1; + + gctx->funcs[i].fn_u.cbc(ctx, dst, src); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + goto done; + } + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + const unsigned int bsize = 128 / 8; + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); + nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + return err; +} +EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); + +static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + void *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *src = (u8 *)walk->src.virt.addr; + u8 *dst = (u8 *)walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + u128 ctrblk; + u128 tmp; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + memcpy(&tmp, src, nbytes); + fn_ctr(ctx, &tmp, &tmp, &ctrblk); + memcpy(dst, &tmp, nbytes); + + u128_to_be128((be128 *)walk->iv, &ctrblk); +} +EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); + +static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + const unsigned int bsize = 128 / 8; + void *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + unsigned int num_blocks, func_bytes; + unsigned int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); + + src += num_blocks; + dst += num_blocks; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + goto done; + } + } + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + const unsigned int bsize = 128 / 8; + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, bsize); + + while ((nbytes = walk.nbytes) >= bsize) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, fpu_enabled, nbytes); + nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + glue_fpu_end(fpu_enabled); + + if (walk.nbytes) { + glue_ctr_crypt_final_128bit( + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} +EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); + +MODULE_LICENSE("GPL"); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 8c86239..49a32ee 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -41,359 +41,9 @@ #include #include #include -#include #include #include -#include -#include -#include - -typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); -typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, - u128 *iv); - -#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) -#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) -#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) - -struct common_glue_func_entry { - unsigned int num_blocks; /* number of blocks that @fn will process */ - union { - common_glue_func_t ecb; - common_glue_cbc_func_t cbc; - common_glue_ctr_func_t ctr; - } fn_u; -}; - -struct common_glue_ctx { - unsigned int num_funcs; - int fpu_blocks_limit; /* -1 means fpu not needed at all */ - - /* - * First funcs entry must have largest num_blocks and last funcs entry - * must have num_blocks == 1! - */ - struct common_glue_func_entry funcs[]; -}; - -static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct blkcipher_desc *desc, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - - if (desc) { - /* prevent sleeping if FPU is in use */ - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - } - - kernel_fpu_begin(); - return true; -} - -static inline void glue_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - - for (i = 0; i < gctx->num_funcs; i++) { - func_bytes = bsize * gctx->funcs[i].num_blocks; - - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ecb(ctx, wdst, - wsrc); - - wsrc += func_bytes; - wdst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} - -int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return __glue_ecb_crypt_128bit(gctx, desc, &walk); -} - -static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} - -int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int -__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 last_iv; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - nbytes -= func_bytes - bsize; - src -= num_blocks - 1; - dst -= num_blocks - 1; - - gctx->funcs[i].fn_u.cbc(ctx, dst, src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} - -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} - -static inline void be128_to_u128(u128 *dst, const be128 *src) -{ - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); -} - -static inline void u128_inc(u128 *i) -{ - i->b++; - if (!i->b) - i->a++; -} - -static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *src = (u8 *)walk->src.virt.addr; - u8 *dst = (u8 *)walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - u128 ctrblk; - u128 tmp; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - memcpy(&tmp, src, nbytes); - fn_ctr(ctx, &tmp, &tmp, &ctrblk); - memcpy(dst, &tmp, nbytes); - - u128_to_be128((be128 *)walk->iv, &ctrblk); -} - -static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - void *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - unsigned int num_blocks, func_bytes; - unsigned int i; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); - - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, bsize); - - while ((nbytes = walk.nbytes) >= bsize) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - - if (walk.nbytes) { - glue_ctr_crypt_final_128bit( - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} +#include static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) { diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h new file mode 100644 index 0000000..3e408bd --- /dev/null +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -0,0 +1,115 @@ +/* + * Shared glue code for 128bit block ciphers + */ + +#ifndef _CRYPTO_GLUE_HELPER_H +#define _CRYPTO_GLUE_HELPER_H + +#include +#include +#include +#include + +typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); +typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, + u128 *iv); + +#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) +#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) +#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) + +struct common_glue_func_entry { + unsigned int num_blocks; /* number of blocks that @fn will process */ + union { + common_glue_func_t ecb; + common_glue_cbc_func_t cbc; + common_glue_ctr_func_t ctr; + } fn_u; +}; + +struct common_glue_ctx { + unsigned int num_funcs; + int fpu_blocks_limit; /* -1 means fpu not needed at all */ + + /* + * First funcs entry must have largest num_blocks and last funcs entry + * must have num_blocks == 1! + */ + struct common_glue_func_entry funcs[]; +}; + +static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, + struct blkcipher_desc *desc, + bool fpu_enabled, unsigned int nbytes) +{ + if (likely(fpu_blocks_limit < 0)) + return false; + + if (fpu_enabled) + return true; + + /* + * Vector-registers are only used when chunk to be processed is large + * enough, so do not enable FPU until it is necessary. + */ + if (nbytes < bsize * (unsigned int)fpu_blocks_limit) + return false; + + if (desc) { + /* prevent sleeping if FPU is in use */ + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + } + + kernel_fpu_begin(); + return true; +} + +static inline void glue_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); + +extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes); + +extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes); + +extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); + +#endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 471cc46..92b4697 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -179,6 +179,11 @@ config CRYPTO_ABLK_HELPER_X86 depends on X86 select CRYPTO_CRYPTD +config CRYPTO_GLUE_HELPER_X86 + tristate + depends on X86 + select CRYPTO_ALGAPI + comment "Authenticated Encryption with Associated Data" config CRYPTO_CCM @@ -793,6 +798,7 @@ config CRYPTO_SERPENT_SSE2_X86_64 select CRYPTO_ALGAPI select CRYPTO_CRYPTD select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS @@ -814,6 +820,7 @@ config CRYPTO_SERPENT_SSE2_586 select CRYPTO_ALGAPI select CRYPTO_CRYPTD select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS -- cgit v0.10.2 From 1d0debbd4671a8d302a11837a126d5f87db16bdc Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:24 +0300 Subject: crypto: serpent-avx: remove duplicated glue code and use shared glue code from glue_helper Now that shared glue code is available, convert serpent-avx to use it. Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 31eb567..c61b91a 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -36,357 +36,147 @@ #include #include #include -#include #include #include #include #include -#include -#include -#include +#include -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) { - if (fpu_enabled) - return true; - - /* AVX is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. - */ - if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) - return false; - - kernel_fpu_begin(); - return true; -} - -static inline void serpent_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) -{ - bool fpu_enabled = false; - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes; - int err; + u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + unsigned int j; - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - if (enc) - serpent_enc_blk_xway(ctx, wdst, wsrc); - else - serpent_dec_blk_xway(ctx, wdst, wsrc); - - wsrc += bsize * SERPENT_PARALLEL_BLOCKS; - wdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - __serpent_encrypt(ctx, wdst, wsrc); - else - __serpent_decrypt(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } + serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - serpent_fpu_end(fpu_enabled); - return err; + for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); -} + be128 ctrblk; -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + u128_to_be128(&ctrblk, iv); + u128_inc(iv); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); + __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, src, (u128 *)&ctrblk); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} + be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; + unsigned int i; -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); } - return err; + serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; - u128 last_iv; - int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); - src -= SERPENT_PARALLEL_BLOCKS - 1; - dst -= SERPENT_PARALLEL_BLOCKS - 1; - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; - - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; +static const struct common_glue_ctx serpent_enc = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + } } +}; - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } +static const struct common_glue_ctx serpent_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } + } } +}; -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; +static const struct common_glue_ctx serpent_dec = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + } } +}; - return nbytes; -} +static const struct common_glue_ctx serpent_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = SERPENT_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + } } +}; -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes)) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - serpent_fpu_end(fpu_enabled); - return err; + return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); } -static inline void u128_to_be128(be128 *dst, const u128 *src) +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); + return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); } -static inline void be128_to_u128(u128 *dst, const be128 *src) +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, + dst, src, nbytes); } -static inline void u128_inc(u128 *i) +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - i->b++; - if (!i->b) - i->a++; + return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, + nbytes); } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[SERPENT_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - __serpent_encrypt(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); + return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) { - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; - int i; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - u128_to_be128(&ctrblocks[i], &ctrblk); - u128_inc(&ctrblk); - } - - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += SERPENT_PARALLEL_BLOCKS; - dst += SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; + return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static inline void serpent_fpu_end(bool fpu_enabled) { - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - serpent_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; + glue_fpu_end(fpu_enabled); } struct crypt_priv { diff --git a/crypto/Kconfig b/crypto/Kconfig index 92b4697..a86c2fb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -842,6 +842,7 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_ALGAPI select CRYPTO_CRYPTD select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_LRW select CRYPTO_XTS -- cgit v0.10.2 From 964263afdcbf9d1e85c021acfff0cc68dd168475 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:29 +0300 Subject: crypto: camellia-x86_64 - remove duplicated glue code and use shared glue code from glue_helper Now that shared glue code is available, convert camellia-x86_64 to use it. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3306dc0..eeb2b3b 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -5,10 +5,6 @@ * * Camellia parts based on code by: * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,9 +30,9 @@ #include #include #include -#include #include #include +#include #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 @@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, &tfm->crt_flags); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - void (*fn)(struct camellia_ctx *, u8 *, const u8 *), - void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) +static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) { - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - fn_2way(ctx, wdst, wsrc); - - wsrc += bsize * 2; - wdst += bsize * 2; - nbytes -= bsize * 2; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); -} + u128 iv = *src; -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); -} + camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; + u128_xor(&dst[1], &dst[1], &iv); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) { - struct blkcipher_walk walk; - int err; + be128 ctrblk; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + if (dst != src) + *dst = *src; - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } + u128_to_be128(&ctrblk, iv); + u128_inc(iv); - return err; + camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, + u128 *iv) { - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[2 - 1]; - u128 last_iv; + be128 ctrblks[2]; - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - nbytes -= bsize * (2 - 1); - src -= 2 - 1; - dst -= 2 - 1; - - ivs[0] = src[0]; - - camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(dst + 1, dst + 1, ivs + 0); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; } - /* Handle leftovers */ - for (;;) { - camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; + u128_to_be128(&ctrblks[0], iv); + u128_inc(iv); + u128_to_be128(&ctrblks[1], iv); + u128_inc(iv); - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; + camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); +static const struct common_glue_ctx camellia_enc = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 2, + .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + } } +}; - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } +static const struct common_glue_ctx camellia_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 2, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + } } +}; - return err; -} +static const struct common_glue_ctx camellia_dec = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 2, + .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + } } +}; -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} +static const struct common_glue_ctx camellia_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 2, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + } } +}; -static inline void be128_to_u128(u128 *dst, const be128 *src) +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); + return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); } -static inline void u128_inc(u128 *i) +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - i->b++; - if (!i->b) - i->a++; + return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 keystream[CAMELLIA_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - u128 ctrblk; - - memcpy(keystream, src, nbytes); - camellia_enc_blk_xor(ctx, keystream, walk->iv); - memcpy(dst, keystream, nbytes); - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - u128_inc(&ctrblk); - u128_to_be128((be128 *)walk->iv, &ctrblk); + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, + dst, src, nbytes); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[2]; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - } - - /* create ctrblks for parallel encrypt */ - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[1], &ctrblk); - u128_inc(&ctrblk); - - camellia_enc_blk_xor_2way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 2; - dst += 2; - nbytes -= bsize * 2; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; + return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, + nbytes); } static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); - - while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; + return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); } static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) diff --git a/crypto/Kconfig b/crypto/Kconfig index a86c2fb..72828fa 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -671,6 +671,7 @@ config CRYPTO_CAMELLIA_X86_64 depends on X86 && 64BIT depends on CRYPTO select CRYPTO_ALGAPI + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_LRW select CRYPTO_XTS help -- cgit v0.10.2 From 414cb5e7cc6e258fe36e2c3cc3ef1ff2e246c0e3 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:34 +0300 Subject: crypto: twofish-x86_64-3way - remove duplicated glue code and use shared glue code from glue_helper Now that shared glue code is available, convert twofish-x86_64-3way to use it. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 77e4e55..25bf5e9 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -3,11 +3,6 @@ * * Copyright (c) 2011 Jussi Kivilinna * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -33,6 +28,7 @@ #include #include #include +#include #include #include @@ -62,311 +58,136 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, __twofish_enc_blk_3way(ctx, dst, src, true); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - void (*fn)(struct twofish_ctx *, u8 *, const u8 *), - void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) +static void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) { - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - fn_3way(ctx, wdst, wsrc); + u128 ivs[2]; - wsrc += bsize * 3; - wdst += bsize * 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); + ivs[0] = src[0]; + ivs[1] = src[1]; - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } + twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); - return err; + u128_xor(&dst[1], &dst[1], &ivs[0]); + u128_xor(&dst[2], &dst[2], &ivs[1]); } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) { - struct blkcipher_walk walk; + be128 ctrblk; - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); -} + if (dst != src) + *dst = *src; -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + u128_to_be128(&ctrblk, iv); + u128_inc(iv); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); + twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, dst, (u128 *)&ctrblk); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, + u128 *iv) { - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} + be128 ctrblks[3]; -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[3 - 1]; - u128 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - nbytes -= bsize * (3 - 1); - src -= 3 - 1; - dst -= 3 - 1; - - ivs[0] = src[0]; - ivs[1] = src[1]; - - twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(dst + 1, dst + 1, ivs + 0); - u128_xor(dst + 2, dst + 2, ivs + 1); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; } -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; + u128_to_be128(&ctrblks[0], iv); + u128_inc(iv); + u128_to_be128(&ctrblks[1], iv); + u128_inc(iv); + u128_to_be128(&ctrblks[2], iv); + u128_inc(iv); - return nbytes; + twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); +static const struct common_glue_ctx twofish_enc = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 3, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + } } +}; - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } +static const struct common_glue_ctx twofish_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 3, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } + } } +}; - return err; -} +static const struct common_glue_ctx twofish_dec = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 3, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + } } +}; -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} +static const struct common_glue_ctx twofish_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = -1, + + .funcs = { { + .num_blocks = 3, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + } } +}; -static inline void be128_to_u128(u128 *dst, const be128 *src) +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); + return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); } -static inline void u128_inc(u128 *i) +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - i->b++; - if (!i->b) - i->a++; + return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[TF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - twofish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, TF_BLOCK_SIZE); + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, + dst, src, nbytes); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[3]; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - } - - /* create ctrblks for parallel encrypt */ - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[1], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[2], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk_xor_3way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 3; - dst += 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; + return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, + nbytes); } static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); - - while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; + return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); } static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) diff --git a/crypto/Kconfig b/crypto/Kconfig index 72828fa..fc559ca 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -935,6 +935,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_LRW select CRYPTO_XTS help -- cgit v0.10.2 From a7378d4e552ac139ae1cbbdfebfeaa9b18c948d0 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:39 +0300 Subject: crypto: twofish-avx - remove duplicated glue code and use shared glue code from glue_helper Now that shared glue code is available, convert twofish-avx to use it. Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index cabe058..782b67d 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -4,9 +4,6 @@ * Copyright (C) 2012 Johannes Goetzfried * * - * Glue code based on serpent_sse2_glue.c by: - * Copyright (C) 2011 Jussi Kivilinna - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -39,38 +36,21 @@ #include #include #include +#include #include +#include #include #include #include - #define TWOFISH_PARALLEL_BLOCKS 8 -/* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -/* 3-way parallel cipher functions from twofish_x86_64-3way module */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } -static inline void twofish_enc_blk_3way_xor(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, true); -} - /* 8-way parallel cipher functions */ asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, bool xor); @@ -95,423 +75,142 @@ static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, twofish_dec_blk_8way(ctx, dst, src); } - -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) { - if (fpu_enabled) - return true; + u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; + unsigned int j; - /* AVX is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. - */ - if (nbytes < TF_BLOCK_SIZE * TWOFISH_PARALLEL_BLOCKS) - return false; + for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; - kernel_fpu_begin(); - return true; -} + twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); -static inline void twofish_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); + for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) +static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) { - bool fpu_enabled = false; - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes; - int err; + be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; + unsigned int i; - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); - - /* Process multi-block batch */ - if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { - do { - if (enc) - twofish_enc_blk_xway(ctx, wdst, wsrc); - else - twofish_dec_blk_xway(ctx, wdst, wsrc); - - wsrc += bsize * TWOFISH_PARALLEL_BLOCKS; - wdst += bsize * TWOFISH_PARALLEL_BLOCKS; - nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; - } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - if (enc) - twofish_enc_blk_3way(ctx, wdst, wsrc); - else - twofish_dec_blk_3way(ctx, wdst, wsrc); - - wsrc += bsize * 3; - wdst += bsize * 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - twofish_enc_blk(ctx, wdst, wsrc); - else - twofish_dec_blk(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); } - twofish_fpu_end(fpu_enabled); - return err; + twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); } +static const struct common_glue_ctx twofish_enc = { + .num_funcs = 3, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } + }, { + .num_blocks = 3, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + } } +}; + +static const struct common_glue_ctx twofish_ctr = { + .num_funcs = 3, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } + }, { + .num_blocks = 3, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } + } } +}; + +static const struct common_glue_ctx twofish_dec = { + .num_funcs = 3, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } + }, { + .num_blocks = 3, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + } } +}; + +static const struct common_glue_ctx twofish_dec_cbc = { + .num_funcs = 3, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } + }, { + .num_blocks = 3, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + } } +}; + static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); + return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); } static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; + return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); } static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; - u128 last_iv; - int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block batch */ - if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (TWOFISH_PARALLEL_BLOCKS - 1); - src -= TWOFISH_PARALLEL_BLOCKS - 1; - dst -= TWOFISH_PARALLEL_BLOCKS - 1; - - for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; - - twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++) - u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - nbytes -= bsize * (3 - 1); - src -= 3 - 1; - dst -= 3 - 1; - - ivs[0] = src[0]; - ivs[1] = src[1]; - - twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(dst + 1, dst + 1, ivs + 0); - u128_xor(dst + 2, dst + 2, ivs + 1); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, + dst, src, nbytes); } static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes)) { - fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - twofish_fpu_end(fpu_enabled); - return err; -} - -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); + return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, + nbytes); } -static inline void be128_to_u128(u128 *dst, const be128 *src) -{ - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); -} - -static inline void u128_inc(u128 *i) +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - i->b++; - if (!i->b) - i->a++; + return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) { - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[TF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - twofish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, TF_BLOCK_SIZE); + return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, + fpu_enabled, nbytes); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[TWOFISH_PARALLEL_BLOCKS]; - int i; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process multi-block batch */ - if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) { - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - u128_to_be128(&ctrblocks[i], &ctrblk); - u128_inc(&ctrblk); - } - - twofish_enc_blk_xway_xor(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += TWOFISH_PARALLEL_BLOCKS; - dst += TWOFISH_PARALLEL_BLOCKS; - nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS; - } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - } - - /* create ctrblks for parallel encrypt */ - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[1], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[2], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk_3way_xor(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 3; - dst += 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static inline void twofish_fpu_end(bool fpu_enabled) { - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { - fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - twofish_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; + glue_fpu_end(fpu_enabled); } struct crypt_priv { @@ -563,26 +262,6 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) twofish_dec_blk(ctx->ctx, srcdst, srcdst); } -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, - keylen - TF_BLOCK_SIZE, &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - TF_BLOCK_SIZE); -} - static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -635,43 +314,6 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - -static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, - key + keylen / 2, keylen / 2, flags); -} - static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -798,7 +440,7 @@ static struct crypto_alg twofish_algs[10] = { { .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), - .cra_exit = lrw_exit_tfm, + .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE + diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 25bf5e9..15f9347 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -28,22 +28,12 @@ #include #include #include +#include #include #include #include -/* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -/* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, @@ -58,7 +48,7 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, __twofish_enc_blk_3way(ctx, dst, src, true); } -static void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) +void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) { u128 ivs[2]; @@ -70,8 +60,9 @@ static void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) u128_xor(&dst[1], &dst[1], &ivs[0]); u128_xor(&dst[2], &dst[2], &ivs[1]); } +EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -static void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) +void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) { be128 ctrblk; @@ -84,8 +75,9 @@ static void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); u128_xor(dst, dst, (u128 *)&ctrblk); } +EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); -static void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, +void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, u128 *iv) { be128 ctrblks[3]; @@ -105,6 +97,7 @@ static void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); } +EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); static const struct common_glue_ctx twofish_enc = { .num_funcs = 2, @@ -220,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) twofish_dec_blk(ctx, srcdst, srcdst); } -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) { struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); int err; @@ -238,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); } +EXPORT_SYMBOL_GPL(lrw_twofish_setkey); static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -273,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return lrw_crypt(desc, dst, src, nbytes, &req); } -static void lrw_exit_tfm(struct crypto_tfm *tfm) +void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) { struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); lrw_free_table(&ctx->lrw_table); } +EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - -static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) { struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; @@ -309,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, flags); } +EXPORT_SYMBOL_GPL(xts_twofish_setkey); static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -419,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { { .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), - .cra_exit = lrw_exit_tfm, + .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h new file mode 100644 index 0000000..9d2c514 --- /dev/null +++ b/arch/x86/include/asm/crypto/twofish.h @@ -0,0 +1,46 @@ +#ifndef ASM_X86_TWOFISH_H +#define ASM_X86_TWOFISH_H + +#include +#include +#include +#include + +struct twofish_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct twofish_ctx twofish_ctx; +}; + +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* helpers from twofish_x86_64-3way module */ +extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); +extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, + u128 *iv); +extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, + u128 *iv); + +extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + +extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); + +extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + +#endif /* ASM_X86_TWOFISH_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index fc559ca..02e3037 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -958,6 +958,7 @@ config CRYPTO_TWOFISH_AVX_X86_64 select CRYPTO_ALGAPI select CRYPTO_CRYPTD select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_GLUE_HELPER_X86 select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY -- cgit v0.10.2 From d4af0e9d6eef6ce53c1935ca6ee3c01889e3212d Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:45 +0300 Subject: crypto: move arch/x86/include/asm/serpent-{sse2|avx}.h to arch/x86/include/asm/crypto/ Move serpent crypto headers to the new asm/crypto/ directory. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index c61b91a..b36bdac 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 49a32ee..d679c86 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h new file mode 100644 index 0000000..432deed --- /dev/null +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -0,0 +1,32 @@ +#ifndef ASM_X86_SERPENT_AVX_H +#define ASM_X86_SERPENT_AVX_H + +#include +#include + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way_avx(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way_avx(ctx, dst, src); +} + +#endif diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h new file mode 100644 index 0000000..e6e77df --- /dev/null +++ b/arch/x86/include/asm/crypto/serpent-sse2.h @@ -0,0 +1,63 @@ +#ifndef ASM_X86_SERPENT_SSE2_H +#define ASM_X86_SERPENT_SSE2_H + +#include +#include + +#ifdef CONFIG_X86_32 + +#define SERPENT_PARALLEL_BLOCKS 4 + +asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_4way(ctx, dst, src); +} + +#else + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, + const u8 *src) +{ + serpent_dec_blk_8way(ctx, dst, src); +} + +#endif + +#endif diff --git a/arch/x86/include/asm/serpent-avx.h b/arch/x86/include/asm/serpent-avx.h deleted file mode 100644 index 432deed..0000000 --- a/arch/x86/include/asm/serpent-avx.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef ASM_X86_SERPENT_AVX_H -#define ASM_X86_SERPENT_AVX_H - -#include -#include - -#define SERPENT_PARALLEL_BLOCKS 8 - -asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way_avx(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way_avx(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way_avx(ctx, dst, src); -} - -#endif diff --git a/arch/x86/include/asm/serpent-sse2.h b/arch/x86/include/asm/serpent-sse2.h deleted file mode 100644 index e6e77df..0000000 --- a/arch/x86/include/asm/serpent-sse2.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef ASM_X86_SERPENT_SSE2_H -#define ASM_X86_SERPENT_SSE2_H - -#include -#include - -#ifdef CONFIG_X86_32 - -#define SERPENT_PARALLEL_BLOCKS 4 - -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_4way(ctx, dst, src); -} - -#else - -#define SERPENT_PARALLEL_BLOCKS 8 - -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way(ctx, dst, src); -} - -#endif - -#endif -- cgit v0.10.2 From 70ef2601feb09d40f4086d055700b7923b3c2d6f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 18 Jun 2012 14:07:50 +0300 Subject: crypto: move arch/x86/include/asm/aes.h to arch/x86/include/asm/crypto/ Move AES header to the new asm/crypto directory. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 8efcf42..59b37de 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -5,7 +5,7 @@ #include #include -#include +#include asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 7c9d54d8..d662615 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h deleted file mode 100644 index 80545a1..0000000 --- a/arch/x86/include/asm/aes.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ASM_X86_AES_H -#define ASM_X86_AES_H - -#include -#include - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -#endif diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h new file mode 100644 index 0000000..80545a1 --- /dev/null +++ b/arch/x86/include/asm/crypto/aes.h @@ -0,0 +1,11 @@ +#ifndef ASM_X86_AES_H +#define ASM_X86_AES_H + +#include +#include + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +#endif -- cgit v0.10.2 From 70d793cc30a129d974363b4f3c22c9db6bbb18ed Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:42:35 -0500 Subject: crypto: caam - remove line continuations from ablkcipher_append_src_dst presumably leftovers from possible macro development. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 4eec389..5c10dc5 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -143,11 +143,11 @@ static inline void aead_append_ld_iv(u32 *desc, int ivsize) */ static inline void ablkcipher_append_src_dst(u32 *desc) { - append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); \ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | \ - KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); \ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); \ + append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | + KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); } /* -- cgit v0.10.2 From a68d2595876c7cc56f122572fa0a3465d438fefc Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:42:36 -0500 Subject: crypto: caam - fix input job ring element dma mapping size SEC4 h/w gets configured in 32- vs. 36-bit physical addressing modes depending on the size of dma_addr_t, which is not always equal to sizeof(u32 *). Also fixed alignment of a dma_unmap call whilst in there. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 340fa32..6ce4c41 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -376,7 +376,7 @@ static int caam_jr_init(struct device *dev) /* Setup rings */ inpbusaddr = dma_map_single(dev, jrp->inpring, - sizeof(u32 *) * JOBR_DEPTH, + sizeof(dma_addr_t) * JOBR_DEPTH, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, inpbusaddr)) { dev_err(dev, "caam_jr_init(): can't map input ring\n"); @@ -391,9 +391,9 @@ static int caam_jr_init(struct device *dev) DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, outbusaddr)) { dev_err(dev, "caam_jr_init(): can't map output ring\n"); - dma_unmap_single(dev, inpbusaddr, - sizeof(u32 *) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); + dma_unmap_single(dev, inpbusaddr, + sizeof(dma_addr_t) * JOBR_DEPTH, + DMA_BIDIRECTIONAL); kfree(jrp->inpring); kfree(jrp->outring); kfree(jrp->entinfo); @@ -447,7 +447,7 @@ int caam_jr_shutdown(struct device *dev) dma_unmap_single(dev, outbusaddr, sizeof(struct jr_outentry) * JOBR_DEPTH, DMA_BIDIRECTIONAL); - dma_unmap_single(dev, inpbusaddr, sizeof(u32 *) * JOBR_DEPTH, + dma_unmap_single(dev, inpbusaddr, sizeof(dma_addr_t) * JOBR_DEPTH, DMA_BIDIRECTIONAL); kfree(jrp->outring); kfree(jrp->inpring); -- cgit v0.10.2 From c4b664063ea5c007f05d2d23aa6edc9cfd385aa3 Mon Sep 17 00:00:00 2001 From: Yashpal Dutta Date: Fri, 22 Jun 2012 19:42:37 -0500 Subject: crypto: caam - fix start index for Protocol shared descriptors In case of protocol acceleration descriptors, Shared descriptor header must carry size of header length + PDB length in words which will be skipped by DECO while processing descriptor to provide first command word offset Signed-off-by: Yashpal Dutta Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 348b882..0d31e27 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -1,7 +1,7 @@ /* * caam descriptor construction helper functions * - * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2008-2012 Freescale Semiconductor, Inc. */ #include "desc.h" @@ -64,7 +64,7 @@ static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) { u32 pdb_len = pdb_bytes / CAAM_CMD_SZ + 1; - init_sh_desc(desc, ((pdb_len << HDR_START_IDX_SHIFT) + pdb_len) | + init_sh_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT) + pdb_len) | options); } -- cgit v0.10.2 From 991c569c5df68609b24a0aba5e5fd4879225c4cf Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:42:38 -0500 Subject: crypto: caam - fix descriptor length adjustments for protocol descriptors init_desc, by always ORing with 1 for the descriptor header inclusion into the descriptor length, and init_sh_desc_pdb, by always specifying the descriptor length modification for the PDB via options, would not allow for odd length PDBs to be embedded in the constructed descriptor length. Fix this by simply changing the OR to an addition. also round-up pdb_bytes to the next SEC command unit size, to allow for, e.g., optional packet header bytes that aren't a multiple of CAAM_CMD_SZ. Reported-by: Radu-Andrei BULIE Signed-off-by: Kim Phillips Cc: Yashpal Dutta Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 0d31e27..8e1056f 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -51,7 +51,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = options | HDR_ONE | 1; + *desc = (options | HDR_ONE) + 1; } static inline void init_sh_desc(u32 *desc, u32 options) @@ -62,7 +62,7 @@ static inline void init_sh_desc(u32 *desc, u32 options) static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) { - u32 pdb_len = pdb_bytes / CAAM_CMD_SZ + 1; + u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; init_sh_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT) + pdb_len) | options); -- cgit v0.10.2 From a23d80e0b77314cc863a075796bc2b6d5245ba60 Mon Sep 17 00:00:00 2001 From: Hemant Agrawal Date: Fri, 22 Jun 2012 19:42:39 -0500 Subject: crypto: caam - add PDB (Protocol Descriptor Block) definitions Add a PDB header file to support building protocol descriptors. Signed-off-by: Steve Cornelius Signed-off-by: Hemant Agrawal Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index a17c295..af25e76 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1585,20 +1585,4 @@ #define NFIFOENTRY_PLEN_SHIFT 0 #define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) -/* - * PDB internal definitions - */ - -/* IPSec ESP CBC Encap/Decap Options */ -#define PDBOPTS_ESPCBC_ARSNONE 0x00 /* no antireplay window */ -#define PDBOPTS_ESPCBC_ARS32 0x40 /* 32-entry antireplay window */ -#define PDBOPTS_ESPCBC_ARS64 0xc0 /* 64-entry antireplay window */ -#define PDBOPTS_ESPCBC_IVSRC 0x20 /* IV comes from internal random gen */ -#define PDBOPTS_ESPCBC_ESN 0x10 /* extended sequence included */ -#define PDBOPTS_ESPCBC_OUTFMT 0x08 /* output only decapsulation (decap) */ -#define PDBOPTS_ESPCBC_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ -#define PDBOPTS_ESPCBC_INCIPHDR 0x04 /* Prepend IP header to output frame */ -#define PDBOPTS_ESPCBC_IPVSN 0x02 /* process IPv6 header */ -#define PDBOPTS_ESPCBC_TUNNEL 0x01 /* tunnel mode next-header byte */ - #endif /* DESC_H */ diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h new file mode 100644 index 0000000..62950d2 --- /dev/null +++ b/drivers/crypto/caam/pdb.h @@ -0,0 +1,401 @@ +/* + * CAAM Protocol Data Block (PDB) definition header file + * + * Copyright 2008-2012 Freescale Semiconductor, Inc. + * + */ + +#ifndef CAAM_PDB_H +#define CAAM_PDB_H + +/* + * PDB- IPSec ESP Header Modification Options + */ +#define PDBHMO_ESP_DECAP_SHIFT 12 +#define PDBHMO_ESP_ENCAP_SHIFT 4 +/* + * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the + * Options Byte IP version (IPvsn) field: + * if IPv4, decrement the inner IP header TTL field (byte 8); + * if IPv6 decrement the inner IP header Hop Limit field (byte 7). +*/ +#define PDBHMO_ESP_DECAP_DEC_TTL (0x02 << PDBHMO_ESP_DECAP_SHIFT) +#define PDBHMO_ESP_ENCAP_DEC_TTL (0x02 << PDBHMO_ESP_ENCAP_SHIFT) +/* + * Decap - DiffServ Copy - Copy the IPv4 TOS or IPv6 Traffic Class byte + * from the outer IP header to the inner IP header. + */ +#define PDBHMO_ESP_DIFFSERV (0x01 << PDBHMO_ESP_DECAP_SHIFT) +/* + * Encap- Copy DF bit -if an IPv4 tunnel mode outer IP header is coming from + * the PDB, copy the DF bit from the inner IP header to the outer IP header. + */ +#define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT) + +/* + * PDB - IPSec ESP Encap/Decap Options + */ +#define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */ +#define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */ +#define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */ +#define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */ +#define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */ +#define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */ +#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ +#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ +#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ +#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ +#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ +#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ +#define PDBOPTS_ESP_UPDATE_CSUM 0x80 /* encap-update ip header checksum */ +#define PDBOPTS_ESP_VERIFY_CSUM 0x20 /* decap-validate ip header checksum */ + +/* + * General IPSec encap/decap PDB definitions + */ +struct ipsec_encap_cbc { + u32 iv[4]; +}; + +struct ipsec_encap_ctr { + u32 ctr_nonce; + u32 ctr_initial; + u32 iv[2]; +}; + +struct ipsec_encap_ccm { + u32 salt; /* lower 24 bits */ + u8 b0_flags; + u8 ctr_flags; + u16 ctr_initial; + u32 iv[2]; +}; + +struct ipsec_encap_gcm { + u32 salt; /* lower 24 bits */ + u32 rsvd1; + u32 iv[2]; +}; + +struct ipsec_encap_pdb { + u8 hmo_rsvd; + u8 ip_nh; + u8 ip_nh_offset; + u8 options; + u32 seq_num_ext_hi; + u32 seq_num; + union { + struct ipsec_encap_cbc cbc; + struct ipsec_encap_ctr ctr; + struct ipsec_encap_ccm ccm; + struct ipsec_encap_gcm gcm; + }; + u32 spi; + u16 rsvd1; + u16 ip_hdr_len; + u32 ip_hdr[0]; /* optional IP Header content */ +}; + +struct ipsec_decap_cbc { + u32 rsvd[2]; +}; + +struct ipsec_decap_ctr { + u32 salt; + u32 ctr_initial; +}; + +struct ipsec_decap_ccm { + u32 salt; + u8 iv_flags; + u8 ctr_flags; + u16 ctr_initial; +}; + +struct ipsec_decap_gcm { + u32 salt; + u32 resvd; +}; + +struct ipsec_decap_pdb { + u16 hmo_ip_hdr_len; + u8 ip_nh_offset; + u8 options; + union { + struct ipsec_decap_cbc cbc; + struct ipsec_decap_ctr ctr; + struct ipsec_decap_ccm ccm; + struct ipsec_decap_gcm gcm; + }; + u32 seq_num_ext_hi; + u32 seq_num; + u32 anti_replay[2]; + u32 end_index[0]; +}; + +/* + * IPSec ESP Datapath Protocol Override Register (DPOVRD) + */ +struct ipsec_deco_dpovrd { +#define IPSEC_ENCAP_DECO_DPOVRD_USE 0x80 + u8 ovrd_ecn; + u8 ip_hdr_len; + u8 nh_offset; + u8 next_header; /* reserved if decap */ +}; + +/* + * IEEE 802.11i WiFi Protocol Data Block + */ +#define WIFI_PDBOPTS_FCS 0x01 +#define WIFI_PDBOPTS_AR 0x40 + +struct wifi_encap_pdb { + u16 mac_hdr_len; + u8 rsvd; + u8 options; + u8 iv_flags; + u8 pri; + u16 pn1; + u32 pn2; + u16 frm_ctrl_mask; + u16 seq_ctrl_mask; + u8 rsvd1[2]; + u8 cnst; + u8 key_id; + u8 ctr_flags; + u8 rsvd2; + u16 ctr_init; +}; + +struct wifi_decap_pdb { + u16 mac_hdr_len; + u8 rsvd; + u8 options; + u8 iv_flags; + u8 pri; + u16 pn1; + u32 pn2; + u16 frm_ctrl_mask; + u16 seq_ctrl_mask; + u8 rsvd1[4]; + u8 ctr_flags; + u8 rsvd2; + u16 ctr_init; +}; + +/* + * IEEE 802.16 WiMAX Protocol Data Block + */ +#define WIMAX_PDBOPTS_FCS 0x01 +#define WIMAX_PDBOPTS_AR 0x40 /* decap only */ + +struct wimax_encap_pdb { + u8 rsvd[3]; + u8 options; + u32 nonce; + u8 b0_flags; + u8 ctr_flags; + u16 ctr_init; + /* begin DECO writeback region */ + u32 pn; + /* end DECO writeback region */ +}; + +struct wimax_decap_pdb { + u8 rsvd[3]; + u8 options; + u32 nonce; + u8 iv_flags; + u8 ctr_flags; + u16 ctr_init; + /* begin DECO writeback region */ + u32 pn; + u8 rsvd1[2]; + u16 antireplay_len; + u64 antireplay_scorecard; + /* end DECO writeback region */ +}; + +/* + * IEEE 801.AE MacSEC Protocol Data Block + */ +#define MACSEC_PDBOPTS_FCS 0x01 +#define MACSEC_PDBOPTS_AR 0x40 /* used in decap only */ + +struct macsec_encap_pdb { + u16 aad_len; + u8 rsvd; + u8 options; + u64 sci; + u16 ethertype; + u8 tci_an; + u8 rsvd1; + /* begin DECO writeback region */ + u32 pn; + /* end DECO writeback region */ +}; + +struct macsec_decap_pdb { + u16 aad_len; + u8 rsvd; + u8 options; + u64 sci; + u8 rsvd1[3]; + /* begin DECO writeback region */ + u8 antireplay_len; + u32 pn; + u64 antireplay_scorecard; + /* end DECO writeback region */ +}; + +/* + * SSL/TLS/DTLS Protocol Data Blocks + */ + +#define TLS_PDBOPTS_ARS32 0x40 +#define TLS_PDBOPTS_ARS64 0xc0 +#define TLS_PDBOPTS_OUTFMT 0x08 +#define TLS_PDBOPTS_IV_WRTBK 0x02 /* 1.1/1.2/DTLS only */ +#define TLS_PDBOPTS_EXP_RND_IV 0x01 /* 1.1/1.2/DTLS only */ + +struct tls_block_encap_pdb { + u8 type; + u8 version[2]; + u8 options; + u64 seq_num; + u32 iv[4]; +}; + +struct tls_stream_encap_pdb { + u8 type; + u8 version[2]; + u8 options; + u64 seq_num; + u8 i; + u8 j; + u8 rsvd1[2]; +}; + +struct dtls_block_encap_pdb { + u8 type; + u8 version[2]; + u8 options; + u16 epoch; + u16 seq_num[3]; + u32 iv[4]; +}; + +struct tls_block_decap_pdb { + u8 rsvd[3]; + u8 options; + u64 seq_num; + u32 iv[4]; +}; + +struct tls_stream_decap_pdb { + u8 rsvd[3]; + u8 options; + u64 seq_num; + u8 i; + u8 j; + u8 rsvd1[2]; +}; + +struct dtls_block_decap_pdb { + u8 rsvd[3]; + u8 options; + u16 epoch; + u16 seq_num[3]; + u32 iv[4]; + u64 antireplay_scorecard; +}; + +/* + * SRTP Protocol Data Blocks + */ +#define SRTP_PDBOPTS_MKI 0x08 +#define SRTP_PDBOPTS_AR 0x40 + +struct srtp_encap_pdb { + u8 x_len; + u8 mki_len; + u8 n_tag; + u8 options; + u32 cnst0; + u8 rsvd[2]; + u16 cnst1; + u16 salt[7]; + u16 cnst2; + u32 rsvd1; + u32 roc; + u32 opt_mki; +}; + +struct srtp_decap_pdb { + u8 x_len; + u8 mki_len; + u8 n_tag; + u8 options; + u32 cnst0; + u8 rsvd[2]; + u16 cnst1; + u16 salt[7]; + u16 cnst2; + u16 rsvd1; + u16 seq_num; + u32 roc; + u64 antireplay_scorecard; +}; + +/* + * DSA/ECDSA Protocol Data Blocks + * Two of these exist: DSA-SIGN, and DSA-VERIFY. They are similar + * except for the treatment of "w" for verify, "s" for sign, + * and the placement of "a,b". + */ +#define DSA_PDB_SGF_SHIFT 24 +#define DSA_PDB_SGF_MASK (0xff << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_Q (0x80 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_R (0x40 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_G (0x20 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_W (0x10 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_S (0x10 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_F (0x08 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_C (0x04 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_D (0x02 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_AB_SIGN (0x02 << DSA_PDB_SGF_SHIFT) +#define DSA_PDB_SGF_AB_VERIFY (0x01 << DSA_PDB_SGF_SHIFT) + +#define DSA_PDB_L_SHIFT 7 +#define DSA_PDB_L_MASK (0x3ff << DSA_PDB_L_SHIFT) + +#define DSA_PDB_N_MASK 0x7f + +struct dsa_sign_pdb { + u32 sgf_ln; /* Use DSA_PDB_ defintions per above */ + u8 *q; + u8 *r; + u8 *g; /* or Gx,y */ + u8 *s; + u8 *f; + u8 *c; + u8 *d; + u8 *ab; /* ECC only */ + u8 *u; +}; + +struct dsa_verify_pdb { + u32 sgf_ln; + u8 *q; + u8 *r; + u8 *g; /* or Gx,y */ + u8 *w; /* or Wx,y */ + u8 *f; + u8 *c; + u8 *d; + u8 *tmp; /* temporary data block */ + u8 *ab; /* only used if ECC processing */ +}; + +#endif -- cgit v0.10.2 From 6ec47334935ffbc3eccc227ed22ab716be9942f1 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:43 -0500 Subject: crypto: caam - support external seq in/out lengths functions for external storage of seq in/out lengths, i.e., for 32-bit lengths. These type-dependent functions automatically determine whether to store the length internally (embedded in the command header word) or externally (after the address pointer), based on size of the type given. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 5c10dc5..d0f8df1 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -37,9 +37,10 @@ * | ShareDesc Pointer | * | SEQ_OUT_PTR | * | (output buffer) | + * | (output length) | * | SEQ_IN_PTR | * | (input buffer) | - * | LOAD (to DECO) | + * | (input length) | * --------------------- */ @@ -62,7 +63,7 @@ #define CAAM_MAX_IV_LENGTH 16 /* length of descriptors text */ -#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 3 + CAAM_PTR_SZ * 3) +#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3) #define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) #define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 16 * CAAM_CMD_SZ) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 8e1056f..c85c1f0 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -117,6 +117,15 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, append_ptr(desc, ptr); } +/* Write length after pointer, rather than inside command */ +static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr, + unsigned int len, u32 command) +{ + append_cmd(desc, command); + append_ptr(desc, ptr); + append_cmd(desc, len); +} + static inline void append_cmd_data(u32 *desc, void *data, int len, u32 command) { @@ -166,13 +175,22 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \ append_cmd_ptr(desc, ptr, len, CMD_##op | options); \ } APPEND_CMD_PTR(key, KEY) -APPEND_CMD_PTR(seq_in_ptr, SEQ_IN_PTR) -APPEND_CMD_PTR(seq_out_ptr, SEQ_OUT_PTR) APPEND_CMD_PTR(load, LOAD) APPEND_CMD_PTR(store, STORE) APPEND_CMD_PTR(fifo_load, FIFO_LOAD) APPEND_CMD_PTR(fifo_store, FIFO_STORE) +#define APPEND_SEQ_PTR_INTLEN(cmd, op) \ +static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ + unsigned int len, \ + u32 options) \ +{ \ + PRINT_POS; \ + append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ +} +APPEND_SEQ_PTR_INTLEN(in, IN) +APPEND_SEQ_PTR_INTLEN(out, OUT) + #define APPEND_CMD_PTR_TO_IMM(cmd, op) \ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \ unsigned int len, u32 options) \ @@ -183,6 +201,33 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \ APPEND_CMD_PTR_TO_IMM(load, LOAD); APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD); +#define APPEND_CMD_PTR_EXTLEN(cmd, op) \ +static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \ + unsigned int len, u32 options) \ +{ \ + PRINT_POS; \ + append_cmd_ptr_extlen(desc, ptr, len, CMD_##op | SQIN_EXT | options); \ +} +APPEND_CMD_PTR_EXTLEN(seq_in_ptr, SEQ_IN_PTR) +APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR) + +/* + * Determine whether to store length internally or externally depending on + * the size of its type + */ +#define APPEND_CMD_PTR_LEN(cmd, op, type) \ +static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \ + type len, u32 options) \ +{ \ + PRINT_POS; \ + if (sizeof(type) > sizeof(u16)) \ + append_##cmd##_extlen(desc, ptr, len, options); \ + else \ + append_##cmd##_intlen(desc, ptr, len, options); \ +} +APPEND_CMD_PTR_LEN(seq_in_ptr, SEQ_IN_PTR, u32) +APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32) + /* * 2nd variant for commands whose specified immediate length differs * from length of immediate data provided, e.g., split keys -- cgit v0.10.2 From 8009a383f28e853df1a1b08d405ccf67ba860fcc Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:44 -0500 Subject: crypto: caam - remove jr register/deregister remove caam_jr_register and caam_jr_deregister to allow sharing of job rings. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index d0f8df1..a4e266f 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -2228,7 +2228,7 @@ static int caam_cra_init(struct crypto_tfm *tfm) * distribute tfms across job rings to ensure in-order * crypto request processing per tfm */ - ctx->jrdev = priv->algapi_jr[(tgt_jr / 2) % priv->num_jrs_for_algapi]; + ctx->jrdev = priv->jrdev[(tgt_jr / 2) % priv->total_jobrs]; /* copy descriptor header template value */ ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type; @@ -2265,7 +2265,6 @@ static void __exit caam_algapi_exit(void) struct device *ctrldev; struct caam_drv_private *priv; struct caam_crypto_alg *t_alg, *n; - int i, err; dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); if (!dev_node) { @@ -2290,13 +2289,6 @@ static void __exit caam_algapi_exit(void) list_del(&t_alg->entry); kfree(t_alg); } - - for (i = 0; i < priv->total_jobrs; i++) { - err = caam_jr_deregister(priv->algapi_jr[i]); - if (err < 0) - break; - } - kfree(priv->algapi_jr); } static struct caam_crypto_alg *caam_alg_alloc(struct device *ctrldev, @@ -2349,7 +2341,7 @@ static int __init caam_algapi_init(void) { struct device_node *dev_node; struct platform_device *pdev; - struct device *ctrldev, **jrdev; + struct device *ctrldev; struct caam_drv_private *priv; int i = 0, err = 0; @@ -2370,24 +2362,6 @@ static int __init caam_algapi_init(void) INIT_LIST_HEAD(&priv->alg_list); - jrdev = kmalloc(sizeof(*jrdev) * priv->total_jobrs, GFP_KERNEL); - if (!jrdev) - return -ENOMEM; - - for (i = 0; i < priv->total_jobrs; i++) { - err = caam_jr_register(ctrldev, &jrdev[i]); - if (err < 0) - break; - } - if (err < 0 && i == 0) { - dev_err(ctrldev, "algapi error in job ring registration: %d\n", - err); - kfree(jrdev); - return err; - } - - priv->num_jrs_for_algapi = i; - priv->algapi_jr = jrdev; atomic_set(&priv->tfm_count, -1); /* register crypto algorithms the device supports */ diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index a34be01..462be99 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -86,8 +86,6 @@ struct caam_drv_private { /* which jr allocated to scatterlist crypto */ atomic_t tfm_count ____cacheline_aligned; - int num_jrs_for_algapi; - struct device **algapi_jr; /* list of registered crypto algorithms (mk generic context handle?) */ struct list_head alg_list; -- cgit v0.10.2 From 4c1ec1f9301549db229bc6dce916f8a99d1f82d6 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:45 -0500 Subject: crypto: caam - refactor key_gen, sg create separate files for split key generation and scatterlist functions. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index ef39011..4447e57 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o -caam-objs := ctrl.o jr.o error.o +caam-objs := ctrl.o jr.o error.o key_gen.o diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index a4e266f..ea0295d 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -51,6 +51,8 @@ #include "desc_constr.h" #include "jr.h" #include "error.h" +#include "sg_link_tbl.h" +#include "key_gen.h" /* * crypto alg @@ -453,121 +455,12 @@ static int aead_setauthsize(struct crypto_aead *authenc, return 0; } -struct split_key_result { - struct completion completion; - int err; -}; - -static void split_key_done(struct device *dev, u32 *desc, u32 err, - void *context) +static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in, + u32 authkeylen) { - struct split_key_result *res = context; - -#ifdef DEBUG - dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err); -#endif - - if (err) { - char tmp[CAAM_ERROR_STR_MAX]; - - dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); - } - - res->err = err; - - complete(&res->completion); -} - -/* -get a split ipad/opad key - -Split key generation----------------------------------------------- - -[00] 0xb0810008 jobdesc: stidx=1 share=never len=8 -[01] 0x04000014 key: class2->keyreg len=20 - @0xffe01000 -[03] 0x84410014 operation: cls2-op sha1 hmac init dec -[04] 0x24940000 fifold: class2 msgdata-last2 len=0 imm -[05] 0xa4000001 jump: class2 local all ->1 [06] -[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 - @0xffe04000 -*/ -static u32 gen_split_key(struct caam_ctx *ctx, const u8 *key_in, u32 authkeylen) -{ - struct device *jrdev = ctx->jrdev; - u32 *desc; - struct split_key_result result; - dma_addr_t dma_addr_in, dma_addr_out; - int ret = 0; - - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); - - init_job_desc(desc, 0); - - dma_addr_in = dma_map_single(jrdev, (void *)key_in, authkeylen, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, dma_addr_in)) { - dev_err(jrdev, "unable to map key input memory\n"); - kfree(desc); - return -ENOMEM; - } - append_key(desc, dma_addr_in, authkeylen, CLASS_2 | - KEY_DEST_CLASS_REG); - - /* Sets MDHA up into an HMAC-INIT */ - append_operation(desc, ctx->alg_op | OP_ALG_DECRYPT | - OP_ALG_AS_INIT); - - /* - * do a FIFO_LOAD of zero, this will trigger the internal key expansion - into both pads inside MDHA - */ - append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2); - - /* - * FIFO_STORE with the explicit split-key content store - * (0x26 output type) - */ - dma_addr_out = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len, - DMA_FROM_DEVICE); - if (dma_mapping_error(jrdev, dma_addr_out)) { - dev_err(jrdev, "unable to map key output memory\n"); - kfree(desc); - return -ENOMEM; - } - append_fifo_store(desc, dma_addr_out, ctx->split_key_len, - LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK); - -#ifdef DEBUG - print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, key_in, authkeylen, 1); - print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); -#endif - - result.err = 0; - init_completion(&result.completion); - - ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { - /* in progress */ - wait_for_completion_interruptible(&result.completion); - ret = result.err; -#ifdef DEBUG - print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, - ctx->split_key_pad_len, 1); -#endif - } - - dma_unmap_single(jrdev, dma_addr_out, ctx->split_key_pad_len, - DMA_FROM_DEVICE); - dma_unmap_single(jrdev, dma_addr_in, authkeylen, DMA_TO_DEVICE); - - kfree(desc); - - return ret; + return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, + ctx->split_key_pad_len, key_in, authkeylen, + ctx->alg_op); } static int aead_setkey(struct crypto_aead *aead, @@ -611,7 +504,7 @@ static int aead_setkey(struct crypto_aead *aead, DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); #endif - ret = gen_split_key(ctx, key, authkeylen); + ret = gen_split_aead_key(ctx, key, authkeylen); if (ret) { goto badkey; } @@ -758,14 +651,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return ret; } -struct link_tbl_entry { - u64 ptr; - u32 len; - u8 reserved; - u8 buf_pool_id; - u16 offset; -}; - /* * aead_edesc - s/w-extended aead descriptor * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist @@ -1027,50 +912,6 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, ablkcipher_request_complete(req, err); } -static void sg_to_link_tbl_one(struct link_tbl_entry *link_tbl_ptr, - dma_addr_t dma, u32 len, u32 offset) -{ - link_tbl_ptr->ptr = dma; - link_tbl_ptr->len = len; - link_tbl_ptr->reserved = 0; - link_tbl_ptr->buf_pool_id = 0; - link_tbl_ptr->offset = offset; -#ifdef DEBUG - print_hex_dump(KERN_ERR, "link_tbl_ptr@"xstr(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, link_tbl_ptr, - sizeof(struct link_tbl_entry), 1); -#endif -} - -/* - * convert scatterlist to h/w link table format - * but does not have final bit; instead, returns last entry - */ -static struct link_tbl_entry *sg_to_link_tbl(struct scatterlist *sg, - int sg_count, struct link_tbl_entry - *link_tbl_ptr, u32 offset) -{ - while (sg_count) { - sg_to_link_tbl_one(link_tbl_ptr, sg_dma_address(sg), - sg_dma_len(sg), offset); - link_tbl_ptr++; - sg = sg_next(sg); - sg_count--; - } - return link_tbl_ptr - 1; -} - -/* - * convert scatterlist to h/w link table format - * scatterlist must have been previously dma mapped - */ -static void sg_to_link_tbl_last(struct scatterlist *sg, int sg_count, - struct link_tbl_entry *link_tbl_ptr, u32 offset) -{ - link_tbl_ptr = sg_to_link_tbl(sg, sg_count, link_tbl_ptr, offset); - link_tbl_ptr->len |= 0x40000000; -} - /* * Fill in aead job descriptor */ @@ -1272,28 +1113,6 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, } /* - * derive number of elements in scatterlist - */ -static int sg_count(struct scatterlist *sg_list, int nbytes) -{ - struct scatterlist *sg = sg_list; - int sg_nents = 0; - - while (nbytes > 0) { - sg_nents++; - nbytes -= sg->length; - if (!sg_is_last(sg) && (sg + 1)->length == 0) - BUG(); /* Not support chaining */ - sg = scatterwalk_sg_next(sg); - } - - if (likely(sg_nents == 1)) - return 0; - - return sg_nents; -} - -/* * allocate and map the aead extended descriptor */ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index af25e76..48c1927 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -8,6 +8,16 @@ #ifndef DESC_H #define DESC_H +struct link_tbl_entry { + u64 ptr; +#define LINK_TBL_LEN_FIN 0x40000000 +#define LINK_TBL_LEN_EXT 0x80000000 + u32 len; + u8 reserved; + u8 buf_pool_id; + u16 offset; +}; + /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ #define MAX_CAAM_DESCSIZE 64 diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c new file mode 100644 index 0000000..0028881 --- /dev/null +++ b/drivers/crypto/caam/key_gen.c @@ -0,0 +1,122 @@ +/* + * CAAM/SEC 4.x functions for handling key-generation jobs + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + */ +#include "compat.h" +#include "jr.h" +#include "error.h" +#include "desc_constr.h" +#include "key_gen.h" + +void split_key_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct split_key_result *res = context; + +#ifdef DEBUG + dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + res->err = err; + + complete(&res->completion); +} +EXPORT_SYMBOL(split_key_done); +/* +get a split ipad/opad key + +Split key generation----------------------------------------------- + +[00] 0xb0810008 jobdesc: stidx=1 share=never len=8 +[01] 0x04000014 key: class2->keyreg len=20 + @0xffe01000 +[03] 0x84410014 operation: cls2-op sha1 hmac init dec +[04] 0x24940000 fifold: class2 msgdata-last2 len=0 imm +[05] 0xa4000001 jump: class2 local all ->1 [06] +[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 + @0xffe04000 +*/ +u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, + int split_key_pad_len, const u8 *key_in, u32 keylen, + u32 alg_op) +{ + u32 *desc; + struct split_key_result result; + dma_addr_t dma_addr_in, dma_addr_out; + int ret = 0; + + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + + init_job_desc(desc, 0); + + dma_addr_in = dma_map_single(jrdev, (void *)key_in, keylen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, dma_addr_in)) { + dev_err(jrdev, "unable to map key input memory\n"); + kfree(desc); + return -ENOMEM; + } + append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG); + + /* Sets MDHA up into an HMAC-INIT */ + append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT); + + /* + * do a FIFO_LOAD of zero, this will trigger the internal key expansion + * into both pads inside MDHA + */ + append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB | + FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2); + + /* + * FIFO_STORE with the explicit split-key content store + * (0x26 output type) + */ + dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len, + DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dma_addr_out)) { + dev_err(jrdev, "unable to map key output memory\n"); + kfree(desc); + return -ENOMEM; + } + append_fifo_store(desc, dma_addr_out, split_key_len, + LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1); + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + result.err = 0; + init_completion(&result.completion); + + ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); + if (!ret) { + /* in progress */ + wait_for_completion_interruptible(&result.completion); + ret = result.err; +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_out, + split_key_pad_len, 1); +#endif + } + + dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len, + DMA_FROM_DEVICE); + dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE); + + kfree(desc); + + return ret; +} diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h new file mode 100644 index 0000000..d95d290 --- /dev/null +++ b/drivers/crypto/caam/key_gen.h @@ -0,0 +1,17 @@ +/* + * CAAM/SEC 4.x definitions for handling key-generation jobs + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + */ + +struct split_key_result { + struct completion completion; + int err; +}; + +void split_key_done(struct device *dev, u32 *desc, u32 err, void *context); + +u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, + int split_key_pad_len, const u8 *key_in, u32 keylen, + u32 alg_op); diff --git a/drivers/crypto/caam/sg_link_tbl.h b/drivers/crypto/caam/sg_link_tbl.h new file mode 100644 index 0000000..6df4349 --- /dev/null +++ b/drivers/crypto/caam/sg_link_tbl.h @@ -0,0 +1,84 @@ +/* + * CAAM/SEC 4.x functions for using scatterlists in caam driver + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + */ + +struct link_tbl_entry; + +/* + * convert single dma address to h/w link table format + */ +static inline void sg_to_link_tbl_one(struct link_tbl_entry *link_tbl_ptr, + dma_addr_t dma, u32 len, u32 offset) +{ + link_tbl_ptr->ptr = dma; + link_tbl_ptr->len = len; + link_tbl_ptr->reserved = 0; + link_tbl_ptr->buf_pool_id = 0; + link_tbl_ptr->offset = offset; +#ifdef DEBUG + print_hex_dump(KERN_ERR, "link_tbl_ptr@: ", + DUMP_PREFIX_ADDRESS, 16, 4, link_tbl_ptr, + sizeof(struct link_tbl_entry), 1); +#endif +} + +/* + * convert scatterlist to h/w link table format + * but does not have final bit; instead, returns last entry + */ +static inline struct link_tbl_entry * +sg_to_link_tbl(struct scatterlist *sg, int sg_count, + struct link_tbl_entry *link_tbl_ptr, u32 offset) +{ + while (sg_count) { + sg_to_link_tbl_one(link_tbl_ptr, sg_dma_address(sg), + sg_dma_len(sg), offset); + link_tbl_ptr++; + sg = sg_next(sg); + sg_count--; + } + return link_tbl_ptr - 1; +} + +/* + * convert scatterlist to h/w link table format + * scatterlist must have been previously dma mapped + */ +static inline void sg_to_link_tbl_last(struct scatterlist *sg, int sg_count, + struct link_tbl_entry *link_tbl_ptr, + u32 offset) +{ + link_tbl_ptr = sg_to_link_tbl(sg, sg_count, link_tbl_ptr, offset); + link_tbl_ptr->len |= LINK_TBL_LEN_FIN; +} + +/* count number of elements in scatterlist */ +static inline int __sg_count(struct scatterlist *sg_list, int nbytes) +{ + struct scatterlist *sg = sg_list; + int sg_nents = 0; + + while (nbytes > 0) { + sg_nents++; + nbytes -= sg->length; + if (!sg_is_last(sg) && (sg + 1)->length == 0) + BUG(); /* Not support chaining */ + sg = scatterwalk_sg_next(sg); + } + + return sg_nents; +} + +/* derive number of elements in scatterlist, but return 0 for 1 */ +static inline int sg_count(struct scatterlist *sg_list, int nbytes) +{ + int sg_nents = __sg_count(sg_list, nbytes); + + if (likely(sg_nents == 1)) + return 0; + + return sg_nents; +} -- cgit v0.10.2 From a299c837040bb47810b9d287dfe7deed6a254995 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:46 -0500 Subject: crypto: caam - link_tbl rename - rename scatterlist and link_tbl functions - link_tbl changed to sec4_sg - sg_to_link_tbl_one changed to dma_to_sec4_sg_one, since no scatterlist is use Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ea0295d..5ab480a 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -51,7 +51,7 @@ #include "desc_constr.h" #include "jr.h" #include "error.h" -#include "sg_link_tbl.h" +#include "sg_sw_sec4.h" #include "key_gen.h" /* @@ -658,8 +658,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, * @dst_nents: number of segments in output scatterlist * @iv_dma: dma address of iv for checking continuity and link table * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) - * @link_tbl_bytes: length of dma mapped link_tbl space - * @link_tbl_dma: bus physical mapped address of h/w link table + * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @sec4_sg_dma: bus physical mapped address of h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables */ struct aead_edesc { @@ -667,9 +667,9 @@ struct aead_edesc { int src_nents; int dst_nents; dma_addr_t iv_dma; - int link_tbl_bytes; - dma_addr_t link_tbl_dma; - struct link_tbl_entry *link_tbl; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; u32 hw_desc[0]; }; @@ -679,24 +679,24 @@ struct aead_edesc { * @dst_nents: number of segments in output scatterlist * @iv_dma: dma address of iv for checking continuity and link table * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) - * @link_tbl_bytes: length of dma mapped link_tbl space - * @link_tbl_dma: bus physical mapped address of h/w link table + * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @sec4_sg_dma: bus physical mapped address of h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables */ struct ablkcipher_edesc { int src_nents; int dst_nents; dma_addr_t iv_dma; - int link_tbl_bytes; - dma_addr_t link_tbl_dma; - struct link_tbl_entry *link_tbl; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; u32 hw_desc[0]; }; static void caam_unmap(struct device *dev, struct scatterlist *src, struct scatterlist *dst, int src_nents, int dst_nents, - dma_addr_t iv_dma, int ivsize, dma_addr_t link_tbl_dma, - int link_tbl_bytes) + dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma, + int sec4_sg_bytes) { if (unlikely(dst != src)) { dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); @@ -707,8 +707,8 @@ static void caam_unmap(struct device *dev, struct scatterlist *src, if (iv_dma) dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE); - if (link_tbl_bytes) - dma_unmap_single(dev, link_tbl_dma, link_tbl_bytes, + if (sec4_sg_bytes) + dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes, DMA_TO_DEVICE); } @@ -723,8 +723,8 @@ static void aead_unmap(struct device *dev, caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents, - edesc->iv_dma, ivsize, edesc->link_tbl_dma, - edesc->link_tbl_bytes); + edesc->iv_dma, ivsize, edesc->sec4_sg_dma, + edesc->sec4_sg_bytes); } static void ablkcipher_unmap(struct device *dev, @@ -736,8 +736,8 @@ static void ablkcipher_unmap(struct device *dev, caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents, - edesc->iv_dma, ivsize, edesc->link_tbl_dma, - edesc->link_tbl_bytes); + edesc->iv_dma, ivsize, edesc->sec4_sg_dma, + edesc->sec4_sg_bytes); } static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, @@ -828,7 +828,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, sizeof(struct iphdr) + req->assoclen + ((req->cryptlen > 1500) ? 1500 : req->cryptlen) + ctx->authsize + 36, 1); - if (!err && edesc->link_tbl_bytes) { + if (!err && edesc->sec4_sg_bytes) { struct scatterlist *sg = sg_last(req->src, edesc->src_nents); print_hex_dump(KERN_ERR, "sglastout@"xstr(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(sg), @@ -927,7 +927,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, u32 *desc = edesc->hw_desc; u32 out_options = 0, in_options; dma_addr_t dst_dma, src_dma; - int len, link_tbl_index = 0; + int len, sec4_sg_index = 0; #ifdef DEBUG debug("assoclen %d cryptlen %d authsize %d\n", @@ -953,9 +953,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, src_dma = sg_dma_address(req->assoc); in_options = 0; } else { - src_dma = edesc->link_tbl_dma; - link_tbl_index += (edesc->assoc_nents ? : 1) + 1 + - (edesc->src_nents ? : 1); + src_dma = edesc->sec4_sg_dma; + sec4_sg_index += (edesc->assoc_nents ? : 1) + 1 + + (edesc->src_nents ? : 1); in_options = LDST_SGF; } if (encrypt) @@ -969,7 +969,7 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, if (all_contig) { dst_dma = sg_dma_address(req->src); } else { - dst_dma = src_dma + sizeof(struct link_tbl_entry) * + dst_dma = src_dma + sizeof(struct sec4_sg_entry) * ((edesc->assoc_nents ? : 1) + 1); out_options = LDST_SGF; } @@ -977,9 +977,9 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, if (!edesc->dst_nents) { dst_dma = sg_dma_address(req->dst); } else { - dst_dma = edesc->link_tbl_dma + - link_tbl_index * - sizeof(struct link_tbl_entry); + dst_dma = edesc->sec4_sg_dma + + sec4_sg_index * + sizeof(struct sec4_sg_entry); out_options = LDST_SGF; } } @@ -1005,7 +1005,7 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr, u32 *desc = edesc->hw_desc; u32 out_options = 0, in_options; dma_addr_t dst_dma, src_dma; - int len, link_tbl_index = 0; + int len, sec4_sg_index = 0; #ifdef DEBUG debug("assoclen %d cryptlen %d authsize %d\n", @@ -1030,8 +1030,8 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr, src_dma = sg_dma_address(req->assoc); in_options = 0; } else { - src_dma = edesc->link_tbl_dma; - link_tbl_index += edesc->assoc_nents + 1 + edesc->src_nents; + src_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->assoc_nents + 1 + edesc->src_nents; in_options = LDST_SGF; } append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + @@ -1041,13 +1041,13 @@ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr, dst_dma = edesc->iv_dma; } else { if (likely(req->src == req->dst)) { - dst_dma = src_dma + sizeof(struct link_tbl_entry) * + dst_dma = src_dma + sizeof(struct sec4_sg_entry) * edesc->assoc_nents; out_options = LDST_SGF; } else { - dst_dma = edesc->link_tbl_dma + - link_tbl_index * - sizeof(struct link_tbl_entry); + dst_dma = edesc->sec4_sg_dma + + sec4_sg_index * + sizeof(struct sec4_sg_entry); out_options = LDST_SGF; } } @@ -1068,7 +1068,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, u32 *desc = edesc->hw_desc; u32 out_options = 0, in_options; dma_addr_t dst_dma, src_dma; - int len, link_tbl_index = 0; + int len, sec4_sg_index = 0; #ifdef DEBUG print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ", @@ -1086,8 +1086,8 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, src_dma = edesc->iv_dma; in_options = 0; } else { - src_dma = edesc->link_tbl_dma; - link_tbl_index += (iv_contig ? 0 : 1) + edesc->src_nents; + src_dma = edesc->sec4_sg_dma; + sec4_sg_index += (iv_contig ? 0 : 1) + edesc->src_nents; in_options = LDST_SGF; } append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options); @@ -1096,16 +1096,16 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, if (!edesc->src_nents && iv_contig) { dst_dma = sg_dma_address(req->src); } else { - dst_dma = edesc->link_tbl_dma + - sizeof(struct link_tbl_entry); + dst_dma = edesc->sec4_sg_dma + + sizeof(struct sec4_sg_entry); out_options = LDST_SGF; } } else { if (!edesc->dst_nents) { dst_dma = sg_dma_address(req->dst); } else { - dst_dma = edesc->link_tbl_dma + - link_tbl_index * sizeof(struct link_tbl_entry); + dst_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); out_options = LDST_SGF; } } @@ -1129,7 +1129,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, int sgc; bool all_contig = true; int ivsize = crypto_aead_ivsize(aead); - int link_tbl_index, link_tbl_len = 0, link_tbl_bytes; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; assoc_nents = sg_count(req->assoc, req->assoclen); src_nents = sg_count(req->src, req->cryptlen); @@ -1157,15 +1157,15 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, all_contig = false; assoc_nents = assoc_nents ? : 1; src_nents = src_nents ? : 1; - link_tbl_len = assoc_nents + 1 + src_nents; + sec4_sg_len = assoc_nents + 1 + src_nents; } - link_tbl_len += dst_nents; + sec4_sg_len += dst_nents; - link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry); + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes + - link_tbl_bytes, GFP_DMA | flags); + sec4_sg_bytes, GFP_DMA | flags); if (!edesc) { dev_err(jrdev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); @@ -1175,32 +1175,32 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->iv_dma = iv_dma; - edesc->link_tbl_bytes = link_tbl_bytes; - edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) + - desc_bytes; - edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl, - link_tbl_bytes, DMA_TO_DEVICE); + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + + desc_bytes; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); *all_contig_ptr = all_contig; - link_tbl_index = 0; + sec4_sg_index = 0; if (!all_contig) { - sg_to_link_tbl(req->assoc, - (assoc_nents ? : 1), - edesc->link_tbl + - link_tbl_index, 0); - link_tbl_index += assoc_nents ? : 1; - sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index, + sg_to_sec4_sg(req->assoc, + (assoc_nents ? : 1), + edesc->sec4_sg + + sec4_sg_index, 0); + sec4_sg_index += assoc_nents ? : 1; + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, iv_dma, ivsize, 0); - link_tbl_index += 1; - sg_to_link_tbl_last(req->src, - (src_nents ? : 1), - edesc->link_tbl + - link_tbl_index, 0); - link_tbl_index += src_nents ? : 1; + sec4_sg_index += 1; + sg_to_sec4_sg_last(req->src, + (src_nents ? : 1), + edesc->sec4_sg + + sec4_sg_index, 0); + sec4_sg_index += src_nents ? : 1; } if (dst_nents) { - sg_to_link_tbl_last(req->dst, dst_nents, - edesc->link_tbl + link_tbl_index, 0); + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); } return edesc; @@ -1307,7 +1307,7 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request int sgc; u32 contig = GIV_SRC_CONTIG | GIV_DST_CONTIG; int ivsize = crypto_aead_ivsize(aead); - int link_tbl_index, link_tbl_len = 0, link_tbl_bytes; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; assoc_nents = sg_count(req->assoc, req->assoclen); src_nents = sg_count(req->src, req->cryptlen); @@ -1336,22 +1336,22 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request contig &= ~GIV_DST_CONTIG; if (unlikely(req->src != req->dst)) { dst_nents = dst_nents ? : 1; - link_tbl_len += 1; + sec4_sg_len += 1; } if (!(contig & GIV_SRC_CONTIG)) { assoc_nents = assoc_nents ? : 1; src_nents = src_nents ? : 1; - link_tbl_len += assoc_nents + 1 + src_nents; + sec4_sg_len += assoc_nents + 1 + src_nents; if (likely(req->src == req->dst)) contig &= ~GIV_DST_CONTIG; } - link_tbl_len += dst_nents; + sec4_sg_len += dst_nents; - link_tbl_bytes = link_tbl_len * sizeof(struct link_tbl_entry); + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes + - link_tbl_bytes, GFP_DMA | flags); + sec4_sg_bytes, GFP_DMA | flags); if (!edesc) { dev_err(jrdev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); @@ -1361,33 +1361,33 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->iv_dma = iv_dma; - edesc->link_tbl_bytes = link_tbl_bytes; - edesc->link_tbl = (void *)edesc + sizeof(struct aead_edesc) + - desc_bytes; - edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl, - link_tbl_bytes, DMA_TO_DEVICE); + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + + desc_bytes; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); *contig_ptr = contig; - link_tbl_index = 0; + sec4_sg_index = 0; if (!(contig & GIV_SRC_CONTIG)) { - sg_to_link_tbl(req->assoc, assoc_nents, - edesc->link_tbl + - link_tbl_index, 0); - link_tbl_index += assoc_nents; - sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index, + sg_to_sec4_sg(req->assoc, assoc_nents, + edesc->sec4_sg + + sec4_sg_index, 0); + sec4_sg_index += assoc_nents; + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, iv_dma, ivsize, 0); - link_tbl_index += 1; - sg_to_link_tbl_last(req->src, src_nents, - edesc->link_tbl + - link_tbl_index, 0); - link_tbl_index += src_nents; + sec4_sg_index += 1; + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg + + sec4_sg_index, 0); + sec4_sg_index += src_nents; } if (unlikely(req->src != req->dst && !(contig & GIV_DST_CONTIG))) { - sg_to_link_tbl_one(edesc->link_tbl + link_tbl_index, + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, iv_dma, ivsize, 0); - link_tbl_index += 1; - sg_to_link_tbl_last(req->dst, dst_nents, - edesc->link_tbl + link_tbl_index, 0); + sec4_sg_index += 1; + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); } return edesc; @@ -1453,13 +1453,13 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - int src_nents, dst_nents = 0, link_tbl_bytes; + int src_nents, dst_nents = 0, sec4_sg_bytes; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; bool iv_contig = false; int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); - int link_tbl_index; + int sec4_sg_index; src_nents = sg_count(req->src, req->nbytes); @@ -1485,12 +1485,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request iv_contig = true; else src_nents = src_nents ? : 1; - link_tbl_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) * - sizeof(struct link_tbl_entry); + sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) * + sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct ablkcipher_edesc) + desc_bytes + - link_tbl_bytes, GFP_DMA | flags); + sec4_sg_bytes, GFP_DMA | flags); if (!edesc) { dev_err(jrdev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); @@ -1498,31 +1498,31 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; - edesc->link_tbl_bytes = link_tbl_bytes; - edesc->link_tbl = (void *)edesc + sizeof(struct ablkcipher_edesc) + - desc_bytes; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + + desc_bytes; - link_tbl_index = 0; + sec4_sg_index = 0; if (!iv_contig) { - sg_to_link_tbl_one(edesc->link_tbl, iv_dma, ivsize, 0); - sg_to_link_tbl_last(req->src, src_nents, - edesc->link_tbl + 1, 0); - link_tbl_index += 1 + src_nents; + dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0); + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg + 1, 0); + sec4_sg_index += 1 + src_nents; } if (unlikely(dst_nents)) { - sg_to_link_tbl_last(req->dst, dst_nents, - edesc->link_tbl + link_tbl_index, 0); + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); } - edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl, - link_tbl_bytes, DMA_TO_DEVICE); + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); edesc->iv_dma = iv_dma; #ifdef DEBUG - print_hex_dump(KERN_ERR, "ablkcipher link_tbl@"xstr(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->link_tbl, - link_tbl_bytes, 1); + print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg, + sec4_sg_bytes, 1); #endif *iv_contig_out = iv_contig; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 48c1927..3e68506 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -8,10 +8,10 @@ #ifndef DESC_H #define DESC_H -struct link_tbl_entry { +struct sec4_sg_entry { u64 ptr; -#define LINK_TBL_LEN_FIN 0x40000000 -#define LINK_TBL_LEN_EXT 0x80000000 +#define SEC4_SG_LEN_FIN 0x40000000 +#define SEC4_SG_LEN_EXT 0x80000000 u32 len; u8 reserved; u8 buf_pool_id; diff --git a/drivers/crypto/caam/sg_link_tbl.h b/drivers/crypto/caam/sg_link_tbl.h deleted file mode 100644 index 6df4349..0000000 --- a/drivers/crypto/caam/sg_link_tbl.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * CAAM/SEC 4.x functions for using scatterlists in caam driver - * - * Copyright 2008-2011 Freescale Semiconductor, Inc. - * - */ - -struct link_tbl_entry; - -/* - * convert single dma address to h/w link table format - */ -static inline void sg_to_link_tbl_one(struct link_tbl_entry *link_tbl_ptr, - dma_addr_t dma, u32 len, u32 offset) -{ - link_tbl_ptr->ptr = dma; - link_tbl_ptr->len = len; - link_tbl_ptr->reserved = 0; - link_tbl_ptr->buf_pool_id = 0; - link_tbl_ptr->offset = offset; -#ifdef DEBUG - print_hex_dump(KERN_ERR, "link_tbl_ptr@: ", - DUMP_PREFIX_ADDRESS, 16, 4, link_tbl_ptr, - sizeof(struct link_tbl_entry), 1); -#endif -} - -/* - * convert scatterlist to h/w link table format - * but does not have final bit; instead, returns last entry - */ -static inline struct link_tbl_entry * -sg_to_link_tbl(struct scatterlist *sg, int sg_count, - struct link_tbl_entry *link_tbl_ptr, u32 offset) -{ - while (sg_count) { - sg_to_link_tbl_one(link_tbl_ptr, sg_dma_address(sg), - sg_dma_len(sg), offset); - link_tbl_ptr++; - sg = sg_next(sg); - sg_count--; - } - return link_tbl_ptr - 1; -} - -/* - * convert scatterlist to h/w link table format - * scatterlist must have been previously dma mapped - */ -static inline void sg_to_link_tbl_last(struct scatterlist *sg, int sg_count, - struct link_tbl_entry *link_tbl_ptr, - u32 offset) -{ - link_tbl_ptr = sg_to_link_tbl(sg, sg_count, link_tbl_ptr, offset); - link_tbl_ptr->len |= LINK_TBL_LEN_FIN; -} - -/* count number of elements in scatterlist */ -static inline int __sg_count(struct scatterlist *sg_list, int nbytes) -{ - struct scatterlist *sg = sg_list; - int sg_nents = 0; - - while (nbytes > 0) { - sg_nents++; - nbytes -= sg->length; - if (!sg_is_last(sg) && (sg + 1)->length == 0) - BUG(); /* Not support chaining */ - sg = scatterwalk_sg_next(sg); - } - - return sg_nents; -} - -/* derive number of elements in scatterlist, but return 0 for 1 */ -static inline int sg_count(struct scatterlist *sg_list, int nbytes) -{ - int sg_nents = __sg_count(sg_list, nbytes); - - if (likely(sg_nents == 1)) - return 0; - - return sg_nents; -} diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h new file mode 100644 index 0000000..a6ad7a4 --- /dev/null +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -0,0 +1,84 @@ +/* + * CAAM/SEC 4.x functions for using scatterlists in caam driver + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + */ + +struct sec4_sg_entry; + +/* + * convert single dma address to h/w link table format + */ +static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, + dma_addr_t dma, u32 len, u32 offset) +{ + sec4_sg_ptr->ptr = dma; + sec4_sg_ptr->len = len; + sec4_sg_ptr->reserved = 0; + sec4_sg_ptr->buf_pool_id = 0; + sec4_sg_ptr->offset = offset; +#ifdef DEBUG + print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", + DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, + sizeof(struct sec4_sg_entry), 1); +#endif +} + +/* + * convert scatterlist to h/w link table format + * but does not have final bit; instead, returns last entry + */ +static inline struct sec4_sg_entry * +sg_to_sec4_sg(struct scatterlist *sg, int sg_count, + struct sec4_sg_entry *sec4_sg_ptr, u32 offset) +{ + while (sg_count) { + dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), + sg_dma_len(sg), offset); + sec4_sg_ptr++; + sg = sg_next(sg); + sg_count--; + } + return sec4_sg_ptr - 1; +} + +/* + * convert scatterlist to h/w link table format + * scatterlist must have been previously dma mapped + */ +static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, + struct sec4_sg_entry *sec4_sg_ptr, + u32 offset) +{ + sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); + sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; +} + +/* count number of elements in scatterlist */ +static inline int __sg_count(struct scatterlist *sg_list, int nbytes) +{ + struct scatterlist *sg = sg_list; + int sg_nents = 0; + + while (nbytes > 0) { + sg_nents++; + nbytes -= sg->length; + if (!sg_is_last(sg) && (sg + 1)->length == 0) + BUG(); /* Not support chaining */ + sg = scatterwalk_sg_next(sg); + } + + return sg_nents; +} + +/* derive number of elements in scatterlist, but return 0 for 1 */ +static inline int sg_count(struct scatterlist *sg_list, int nbytes) +{ + int sg_nents = __sg_count(sg_list, nbytes); + + if (likely(sg_nents == 1)) + return 0; + + return sg_nents; +} -- cgit v0.10.2 From 045e36780f11523e26d1e4a8c78bdc57f4003bd0 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:47 -0500 Subject: crypto: caam - ahash hmac support caam supports ahash hmac with sha algorithms and md5. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 2d876bb..e61b7f5 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -70,3 +70,15 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API To compile this as a module, choose M here: the module will be called caamalg. + +config CRYPTO_DEV_FSL_CAAM_AHASH_API + tristate "Register hash algorithm implementations with Crypto API" + depends on CRYPTO_DEV_FSL_CAAM + default y + select CRYPTO_AHASH + help + Selecting this will offload ahash for users of the + scatterlist crypto API to the SEC4 via job ring. + + To compile this as a module, choose M here: the module + will be called caamhash. diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 4447e57..9ef1cb0 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o caam-objs := ctrl.o jr.o error.o key_gen.o diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c new file mode 100644 index 0000000..c0b6d86 --- /dev/null +++ b/drivers/crypto/caam/caamhash.c @@ -0,0 +1,1814 @@ +/* + * caam - Freescale FSL CAAM support for ahash functions of crypto API + * + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Based on caamalg.c crypto API driver. + * + * relationship of digest job descriptor or first job descriptor after init to + * shared descriptors: + * + * --------------- --------------- + * | JobDesc #1 |-------------------->| ShareDesc | + * | *(packet 1) | | (hashKey) | + * --------------- | (operation) | + * --------------- + * + * relationship of subsequent job descriptors to shared descriptors: + * + * --------------- --------------- + * | JobDesc #2 |-------------------->| ShareDesc | + * | *(packet 2) | |------------->| (hashKey) | + * --------------- | |-------->| (operation) | + * . | | | (load ctx2) | + * . | | --------------- + * --------------- | | + * | JobDesc #3 |------| | + * | *(packet 3) | | + * --------------- | + * . | + * . | + * --------------- | + * | JobDesc #4 |------------ + * | *(packet 4) | + * --------------- + * + * The SharedDesc never changes for a connection unless rekeyed, but + * each packet will likely be in a different place. So all we need + * to know to process the packet is where the input is, where the + * output goes, and what context we want to process with. Context is + * in the SharedDesc, packet references in the JobDesc. + * + * So, a job desc looks like: + * + * --------------------- + * | Header | + * | ShareDesc Pointer | + * | SEQ_OUT_PTR | + * | (output buffer) | + * | (output length) | + * | SEQ_IN_PTR | + * | (input buffer) | + * | (input length) | + * --------------------- + */ + +#include "compat.h" + +#include "regs.h" +#include "intern.h" +#include "desc_constr.h" +#include "jr.h" +#include "error.h" +#include "sg_sw_sec4.h" +#include "key_gen.h" + +#define CAAM_CRA_PRIORITY 3000 + +/* max hash key is max split key size */ +#define CAAM_MAX_HASH_KEY_SIZE (SHA512_DIGEST_SIZE * 2) + +#define CAAM_MAX_HASH_BLOCK_SIZE SHA512_BLOCK_SIZE +#define CAAM_MAX_HASH_DIGEST_SIZE SHA512_DIGEST_SIZE + +/* length of descriptors text */ +#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3) + +#define DESC_AHASH_BASE (4 * CAAM_CMD_SZ) +#define DESC_AHASH_UPDATE_LEN (6 * CAAM_CMD_SZ) +#define DESC_AHASH_UPDATE_FIRST_LEN (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ) +#define DESC_AHASH_FINAL_LEN (DESC_AHASH_BASE + 5 * CAAM_CMD_SZ) +#define DESC_AHASH_FINUP_LEN (DESC_AHASH_BASE + 5 * CAAM_CMD_SZ) +#define DESC_AHASH_DIGEST_LEN (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ) + +#define DESC_HASH_MAX_USED_BYTES (DESC_AHASH_FINAL_LEN + \ + CAAM_MAX_HASH_KEY_SIZE) +#define DESC_HASH_MAX_USED_LEN (DESC_HASH_MAX_USED_BYTES / CAAM_CMD_SZ) + +/* caam context sizes for hashes: running digest + 8 */ +#define HASH_MSG_LEN 8 +#define MAX_CTX_LEN (HASH_MSG_LEN + SHA512_DIGEST_SIZE) + +#ifdef DEBUG +/* for print_hex_dumps with line references */ +#define xstr(s) str(s) +#define str(s) #s +#define debug(format, arg...) printk(format, arg) +#else +#define debug(format, arg...) +#endif + +/* ahash per-session context */ +struct caam_hash_ctx { + struct device *jrdev; + u32 sh_desc_update[DESC_HASH_MAX_USED_LEN]; + u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN]; + u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN]; + u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN]; + u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN]; + dma_addr_t sh_desc_update_dma; + dma_addr_t sh_desc_update_first_dma; + dma_addr_t sh_desc_fin_dma; + dma_addr_t sh_desc_digest_dma; + dma_addr_t sh_desc_finup_dma; + u32 alg_type; + u32 alg_op; + u8 key[CAAM_MAX_HASH_KEY_SIZE]; + dma_addr_t key_dma; + int ctx_len; + unsigned int split_key_len; + unsigned int split_key_pad_len; +}; + +/* ahash state */ +struct caam_hash_state { + dma_addr_t buf_dma; + dma_addr_t ctx_dma; + u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned; + int buflen_0; + u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned; + int buflen_1; + u8 caam_ctx[MAX_CTX_LEN]; + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*finup)(struct ahash_request *req); + int current_buf; +}; + +/* Common job descriptor seq in/out ptr routines */ + +/* Map state->caam_ctx, and append seq_out_ptr command that points to it */ +static inline void map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev, + struct caam_hash_state *state, + int ctx_len) +{ + state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, + ctx_len, DMA_FROM_DEVICE); + append_seq_out_ptr(desc, state->ctx_dma, ctx_len, 0); +} + +/* Map req->result, and append seq_out_ptr command that points to it */ +static inline dma_addr_t map_seq_out_ptr_result(u32 *desc, struct device *jrdev, + u8 *result, int digestsize) +{ + dma_addr_t dst_dma; + + dst_dma = dma_map_single(jrdev, result, digestsize, DMA_FROM_DEVICE); + append_seq_out_ptr(desc, dst_dma, digestsize, 0); + + return dst_dma; +} + +/* Map current buffer in state and put it in link table */ +static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev, + struct sec4_sg_entry *sec4_sg, + u8 *buf, int buflen) +{ + dma_addr_t buf_dma; + + buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); + dma_to_sec4_sg_one(sec4_sg, buf_dma, buflen, 0); + + return buf_dma; +} + +/* Map req->src and put it in link table */ +static inline void src_map_to_sec4_sg(struct device *jrdev, + struct scatterlist *src, int src_nents, + struct sec4_sg_entry *sec4_sg) +{ + dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE); + sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0); +} + +/* + * Only put buffer in link table if it contains data, which is possible, + * since a buffer has previously been used, and needs to be unmapped, + */ +static inline dma_addr_t +try_buf_map_to_sec4_sg(struct device *jrdev, struct sec4_sg_entry *sec4_sg, + u8 *buf, dma_addr_t buf_dma, int buflen, + int last_buflen) +{ + if (buf_dma && !dma_mapping_error(jrdev, buf_dma)) + dma_unmap_single(jrdev, buf_dma, last_buflen, DMA_TO_DEVICE); + if (buflen) + buf_dma = buf_map_to_sec4_sg(jrdev, sec4_sg, buf, buflen); + else + buf_dma = 0; + + return buf_dma; +} + +/* Map state->caam_ctx, and add it to link table */ +static inline void ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev, + struct caam_hash_state *state, + int ctx_len, + struct sec4_sg_entry *sec4_sg, + u32 flag) +{ + state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, ctx_len, flag); + dma_to_sec4_sg_one(sec4_sg, state->ctx_dma, ctx_len, 0); +} + +/* Common shared descriptor commands */ +static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx) +{ + append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, + ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); +} + +/* Append key if it has been set */ +static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx) +{ + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_WAIT); + + if (ctx->split_key_len) { + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + append_key_ahash(desc, ctx); + + set_jump_tgt_here(desc, key_jump_cmd); + } + + /* Propagate errors from shared to job descriptor */ + append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); +} + +/* + * For ahash read data from seqin following state->caam_ctx, + * and write resulting class2 context to seqout, which may be state->caam_ctx + * or req->result + */ +static inline void ahash_append_load_str(u32 *desc, int digestsize) +{ + /* Calculate remaining bytes to read */ + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Read remaining bytes */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 | + FIFOLD_TYPE_MSG | KEY_VLF); + + /* Store class2 context bytes */ + append_seq_store(desc, digestsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); +} + +/* + * For ahash update, final and finup, import context, read and write to seqout + */ +static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state, + int digestsize, + struct caam_hash_ctx *ctx) +{ + init_sh_desc_key_ahash(desc, ctx); + + /* Import context from software */ + append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | + LDST_CLASS_2_CCB | ctx->ctx_len); + + /* Class 2 operation */ + append_operation(desc, op | state | OP_ALG_ENCRYPT); + + /* + * Load from buf and/or src and write to req->result or state->context + */ + ahash_append_load_str(desc, digestsize); +} + +/* For ahash firsts and digest, read and write to seqout */ +static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state, + int digestsize, struct caam_hash_ctx *ctx) +{ + init_sh_desc_key_ahash(desc, ctx); + + /* Class 2 operation */ + append_operation(desc, op | state | OP_ALG_ENCRYPT); + + /* + * Load from buf and/or src and write to req->result or state->context + */ + ahash_append_load_str(desc, digestsize); +} + +static int ahash_set_sh_desc(struct crypto_ahash *ahash) +{ + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + int digestsize = crypto_ahash_digestsize(ahash); + struct device *jrdev = ctx->jrdev; + u32 have_key = 0; + u32 *desc; + + if (ctx->split_key_len) + have_key = OP_ALG_AAI_HMAC_PRECOMP; + + /* ahash_update shared descriptor */ + desc = ctx->sh_desc_update; + + init_sh_desc(desc, HDR_SHARE_WAIT); + + /* Import context from software */ + append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | + LDST_CLASS_2_CCB | ctx->ctx_len); + + /* Class 2 operation */ + append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE | + OP_ALG_ENCRYPT); + + /* Load data and write to result or context */ + ahash_append_load_str(desc, ctx->ctx_len); + + ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ahash update shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + /* ahash_update_first shared descriptor */ + desc = ctx->sh_desc_update_first; + + ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT, + ctx->ctx_len, ctx); + + ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_update_first_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ahash update first shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + /* ahash_final shared descriptor */ + desc = ctx->sh_desc_fin; + + ahash_ctx_data_to_out(desc, have_key | ctx->alg_type, + OP_ALG_AS_FINALIZE, digestsize, ctx); + + ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ahash final shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + /* ahash_finup shared descriptor */ + desc = ctx->sh_desc_finup; + + ahash_ctx_data_to_out(desc, have_key | ctx->alg_type, + OP_ALG_AS_FINALIZE, digestsize, ctx); + + ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ahash finup shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + /* ahash_digest shared descriptor */ + desc = ctx->sh_desc_digest; + + ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL, + digestsize, ctx); + + ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_digest_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ahash digest shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + return 0; +} + +static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, + u32 keylen) +{ + return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, + ctx->split_key_pad_len, key_in, keylen, + ctx->alg_op); +} + +/* Digest hash size if it is too large */ +static u32 hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, + u32 *keylen, u8 *key_out, u32 digestsize) +{ + struct device *jrdev = ctx->jrdev; + u32 *desc; + struct split_key_result result; + dma_addr_t src_dma, dst_dma; + int ret = 0; + + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + + init_job_desc(desc, 0); + + src_dma = dma_map_single(jrdev, (void *)key_in, *keylen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, src_dma)) { + dev_err(jrdev, "unable to map key input memory\n"); + kfree(desc); + return -ENOMEM; + } + dst_dma = dma_map_single(jrdev, (void *)key_out, digestsize, + DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { + dev_err(jrdev, "unable to map key output memory\n"); + dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE); + kfree(desc); + return -ENOMEM; + } + + /* Job descriptor to perform unkeyed hash on key_in */ + append_operation(desc, ctx->alg_type | OP_ALG_ENCRYPT | + OP_ALG_AS_INITFINAL); + append_seq_in_ptr(desc, src_dma, *keylen, 0); + append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_MSG); + append_seq_out_ptr(desc, dst_dma, digestsize, 0); + append_seq_store(desc, digestsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "key_in@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1); + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + result.err = 0; + init_completion(&result.completion); + + ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); + if (!ret) { + /* in progress */ + wait_for_completion_interruptible(&result.completion); + ret = result.err; +#ifdef DEBUG + print_hex_dump(KERN_ERR, "digested key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, + digestsize, 1); +#endif + } + *keylen = digestsize; + + dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE); + dma_unmap_single(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE); + + kfree(desc); + + return ret; +} + +static int ahash_setkey(struct crypto_ahash *ahash, + const u8 *key, unsigned int keylen) +{ + /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ + static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct device *jrdev = ctx->jrdev; + int blocksize = crypto_tfm_alg_blocksize(&ahash->base); + int digestsize = crypto_ahash_digestsize(ahash); + int ret = 0; + u8 *hashed_key = NULL; + +#ifdef DEBUG + printk(KERN_ERR "keylen %d\n", keylen); +#endif + + if (keylen > blocksize) { + hashed_key = kmalloc(sizeof(u8) * digestsize, GFP_KERNEL | + GFP_DMA); + if (!hashed_key) + return -ENOMEM; + ret = hash_digest_key(ctx, key, &keylen, hashed_key, + digestsize); + if (ret) + goto badkey; + key = hashed_key; + } + + /* Pick class 2 key length from algorithm submask */ + ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> + OP_ALG_ALGSEL_SHIFT] * 2; + ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); + +#ifdef DEBUG + printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", + ctx->split_key_len, ctx->split_key_pad_len); + print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); +#endif + + ret = gen_split_hash_key(ctx, key, keylen); + if (ret) + goto badkey; + + ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->key_dma)) { + dev_err(jrdev, "unable to map key i/o memory\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, + ctx->split_key_pad_len, 1); +#endif + + ret = ahash_set_sh_desc(ahash); + if (ret) { + dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len, + DMA_TO_DEVICE); + } + + kfree(hashed_key); + return ret; +badkey: + kfree(hashed_key); + crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; +} + +/* + * ahash_edesc - s/w-extended ahash descriptor + * @dst_dma: physical mapped address of req->result + * @sec4_sg_dma: physical mapped address of h/w link table + * @src_nents: number of segments in input scatterlist + * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @sec4_sg: pointer to h/w link table + * @hw_desc: the h/w job descriptor followed by any referenced link tables + */ +struct ahash_edesc { + dma_addr_t dst_dma; + dma_addr_t sec4_sg_dma; + int src_nents; + int sec4_sg_bytes; + struct sec4_sg_entry *sec4_sg; + u32 hw_desc[0]; +}; + +static inline void ahash_unmap(struct device *dev, + struct ahash_edesc *edesc, + struct ahash_request *req, int dst_len) +{ + if (edesc->src_nents) + dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + if (edesc->dst_dma) + dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE); + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, + edesc->sec4_sg_bytes, DMA_TO_DEVICE); +} + +static inline void ahash_unmap_ctx(struct device *dev, + struct ahash_edesc *edesc, + struct ahash_request *req, int dst_len, u32 flag) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + + if (state->ctx_dma) + dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag); + ahash_unmap(dev, edesc, req, dst_len); +} + +static void ahash_done(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct ahash_request *req = context; + struct ahash_edesc *edesc; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + int digestsize = crypto_ahash_digestsize(ahash); +#ifdef DEBUG + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + ahash_unmap(jrdev, edesc, req, digestsize); + kfree(edesc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) + print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); +#endif + + req->base.complete(&req->base, err); +} + +static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct ahash_request *req = context; + struct ahash_edesc *edesc; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); +#ifdef DEBUG + struct caam_hash_state *state = ahash_request_ctx(req); + int digestsize = crypto_ahash_digestsize(ahash); + + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); + kfree(edesc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) + print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); +#endif + + req->base.complete(&req->base, err); +} + +static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct ahash_request *req = context; + struct ahash_edesc *edesc; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + int digestsize = crypto_ahash_digestsize(ahash); +#ifdef DEBUG + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + kfree(edesc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) + print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); +#endif + + req->base.complete(&req->base, err); +} + +static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct ahash_request *req = context; + struct ahash_edesc *edesc; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); +#ifdef DEBUG + struct caam_hash_state *state = ahash_request_ctx(req); + int digestsize = crypto_ahash_digestsize(ahash); + + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE); + kfree(edesc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) + print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); +#endif + + req->base.complete(&req->base, err); +} + +/* submit update job descriptor */ +static int ahash_update_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0; + u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1; + int *next_buflen = state->current_buf ? &state->buflen_0 : + &state->buflen_1, last_buflen; + int in_len = *buflen + req->nbytes, to_hash; + u32 *sh_desc = ctx->sh_desc_update, *desc; + dma_addr_t ptr = ctx->sh_desc_update_dma; + int src_nents, sec4_sg_bytes, sec4_sg_src_index; + struct ahash_edesc *edesc; + int ret = 0; + int sh_len; + + last_buflen = *next_buflen; + *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1); + to_hash = in_len - *next_buflen; + + if (to_hash) { + src_nents = __sg_count(req->src, req->nbytes - (*next_buflen)); + sec4_sg_src_index = 1 + (*buflen ? 1 : 0); + sec4_sg_bytes = (sec4_sg_src_index + src_nents) * + sizeof(struct sec4_sg_entry); + + /* + * allocate space for base edesc and hw desc commands, + * link tables + */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, + "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + edesc->src_nents = src_nents; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, + DMA_TO_DEVICE); + + ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, + edesc->sec4_sg, DMA_BIDIRECTIONAL); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, + edesc->sec4_sg + 1, + buf, state->buf_dma, + *buflen, last_buflen); + + if (src_nents) { + src_map_to_sec4_sg(jrdev, req->src, src_nents, + edesc->sec4_sg + sec4_sg_src_index); + if (*next_buflen) { + sg_copy_part(next_buf, req->src, to_hash - + *buflen, req->nbytes); + state->current_buf = !state->current_buf; + } + } else { + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= + SEC4_SG_LEN_FIN; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + + to_hash, LDST_SGF); + + append_seq_out_ptr(desc, state->ctx_dma, ctx->ctx_len, 0); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, + DMA_BIDIRECTIONAL); + kfree(edesc); + } + } else if (*next_buflen) { + sg_copy(buf + *buflen, req->src, req->nbytes); + *buflen = *next_buflen; + *next_buflen = last_buflen; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1); + print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); +#endif + + return ret; +} + +static int ahash_final_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; + int last_buflen = state->current_buf ? state->buflen_0 : + state->buflen_1; + u32 *sh_desc = ctx->sh_desc_fin, *desc; + dma_addr_t ptr = ctx->sh_desc_fin_dma; + int sec4_sg_bytes; + int digestsize = crypto_ahash_digestsize(ahash); + struct ahash_edesc *edesc; + int ret = 0; + int sh_len; + + sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + edesc->src_nents = 0; + + ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg, + DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, + buf, state->buf_dma, buflen, + last_buflen); + (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen, + LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + kfree(edesc); + } + + return ret; +} + +static int ahash_finup_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; + int last_buflen = state->current_buf ? state->buflen_0 : + state->buflen_1; + u32 *sh_desc = ctx->sh_desc_finup, *desc; + dma_addr_t ptr = ctx->sh_desc_finup_dma; + int sec4_sg_bytes, sec4_sg_src_index; + int src_nents; + int digestsize = crypto_ahash_digestsize(ahash); + struct ahash_edesc *edesc; + int ret = 0; + int sh_len; + + src_nents = __sg_count(req->src, req->nbytes); + sec4_sg_src_index = 1 + (buflen ? 1 : 0); + sec4_sg_bytes = (sec4_sg_src_index + src_nents) * + sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + edesc->src_nents = src_nents; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + + ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg, + DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, + buf, state->buf_dma, buflen, + last_buflen); + + src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + + sec4_sg_src_index); + + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + + buflen + req->nbytes, LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + kfree(edesc); + } + + return ret; +} + +static int ahash_digest(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u32 *sh_desc = ctx->sh_desc_digest, *desc; + dma_addr_t ptr = ctx->sh_desc_digest_dma; + int digestsize = crypto_ahash_digestsize(ahash); + int src_nents, sec4_sg_bytes; + dma_addr_t src_dma; + struct ahash_edesc *edesc; + int ret = 0; + u32 options; + int sh_len; + + src_nents = sg_count(req->src, req->nbytes); + dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kmalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes + + DESC_JOB_IO_LEN, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return -ENOMEM; + } + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + edesc->src_nents = src_nents; + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + if (src_nents) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + src_dma = edesc->sec4_sg_dma; + options = LDST_SGF; + } else { + src_dma = sg_dma_address(req->src); + options = 0; + } + append_seq_in_ptr(desc, src_dma, req->nbytes, options); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap(jrdev, edesc, req, digestsize); + kfree(edesc); + } + + return ret; +} + +/* submit ahash final if it the first job descriptor */ +static int ahash_final_no_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; + u32 *sh_desc = ctx->sh_desc_digest, *desc; + dma_addr_t ptr = ctx->sh_desc_digest_dma; + int digestsize = crypto_ahash_digestsize(ahash); + struct ahash_edesc *edesc; + int ret = 0; + int sh_len; + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN, + GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); + + append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); + edesc->src_nents = 0; + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap(jrdev, edesc, req, digestsize); + kfree(edesc); + } + + return ret; +} + +/* submit ahash update if it the first job descriptor after update */ +static int ahash_update_no_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0; + u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1; + int *next_buflen = state->current_buf ? &state->buflen_0 : + &state->buflen_1; + int in_len = *buflen + req->nbytes, to_hash; + int sec4_sg_bytes, src_nents; + struct ahash_edesc *edesc; + u32 *desc, *sh_desc = ctx->sh_desc_update_first; + dma_addr_t ptr = ctx->sh_desc_update_first_dma; + int ret = 0; + int sh_len; + + *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1); + to_hash = in_len - *next_buflen; + + if (to_hash) { + src_nents = __sg_count(req->src, req->nbytes - (*next_buflen)); + sec4_sg_bytes = (1 + src_nents) * + sizeof(struct sec4_sg_entry); + + /* + * allocate space for base edesc and hw desc commands, + * link tables + */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, + "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + edesc->src_nents = src_nents; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, + DMA_TO_DEVICE); + + state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, + buf, *buflen); + src_map_to_sec4_sg(jrdev, req->src, src_nents, + edesc->sec4_sg + 1); + if (*next_buflen) { + sg_copy_part(next_buf, req->src, to_hash - *buflen, + req->nbytes); + state->current_buf = !state->current_buf; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + + append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF); + + map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); + if (!ret) { + ret = -EINPROGRESS; + state->update = ahash_update_ctx; + state->finup = ahash_finup_ctx; + state->final = ahash_final_ctx; + } else { + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, + DMA_TO_DEVICE); + kfree(edesc); + } + } else if (*next_buflen) { + sg_copy(buf + *buflen, req->src, req->nbytes); + *buflen = *next_buflen; + *next_buflen = 0; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1); + print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); +#endif + + return ret; +} + +/* submit ahash finup if it the first job descriptor after update */ +static int ahash_finup_no_ctx(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; + int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; + int last_buflen = state->current_buf ? state->buflen_0 : + state->buflen_1; + u32 *sh_desc = ctx->sh_desc_digest, *desc; + dma_addr_t ptr = ctx->sh_desc_digest_dma; + int sec4_sg_bytes, sec4_sg_src_index, src_nents; + int digestsize = crypto_ahash_digestsize(ahash); + struct ahash_edesc *edesc; + int sh_len; + int ret = 0; + + src_nents = __sg_count(req->src, req->nbytes); + sec4_sg_src_index = 2; + sec4_sg_bytes = (sec4_sg_src_index + src_nents) * + sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + edesc->src_nents = src_nents; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf, + state->buf_dma, buflen, + last_buflen); + + src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1); + + append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen + + req->nbytes, LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + ahash_unmap(jrdev, edesc, req, digestsize); + kfree(edesc); + } + + return ret; +} + +/* submit first update job descriptor after init */ +static int ahash_update_first(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + u8 *next_buf = state->buf_0 + state->current_buf * + CAAM_MAX_HASH_BLOCK_SIZE; + int *next_buflen = &state->buflen_0 + state->current_buf; + int to_hash; + u32 *sh_desc = ctx->sh_desc_update_first, *desc; + dma_addr_t ptr = ctx->sh_desc_update_first_dma; + int sec4_sg_bytes, src_nents; + dma_addr_t src_dma; + u32 options; + struct ahash_edesc *edesc; + int ret = 0; + int sh_len; + + *next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) - + 1); + to_hash = req->nbytes - *next_buflen; + + if (to_hash) { + src_nents = sg_count(req->src, req->nbytes - (*next_buflen)); + dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry); + + /* + * allocate space for base edesc and hw desc commands, + * link tables + */ + edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, + "could not allocate extended descriptor\n"); + return -ENOMEM; + } + + edesc->src_nents = src_nents; + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, + DMA_TO_DEVICE); + + if (src_nents) { + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg, 0); + src_dma = edesc->sec4_sg_dma; + options = LDST_SGF; + } else { + src_dma = sg_dma_address(req->src); + options = 0; + } + + if (*next_buflen) + sg_copy_part(next_buf, req->src, to_hash, req->nbytes); + + sh_len = desc_len(sh_desc); + desc = edesc->hw_desc; + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + + append_seq_in_ptr(desc, src_dma, to_hash, options); + + map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, + req); + if (!ret) { + ret = -EINPROGRESS; + state->update = ahash_update_ctx; + state->finup = ahash_finup_ctx; + state->final = ahash_final_ctx; + } else { + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, + DMA_TO_DEVICE); + kfree(edesc); + } + } else if (*next_buflen) { + state->update = ahash_update_no_ctx; + state->finup = ahash_finup_no_ctx; + state->final = ahash_final_no_ctx; + sg_copy(next_buf, req->src, req->nbytes); + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); +#endif + + return ret; +} + +static int ahash_finup_first(struct ahash_request *req) +{ + return ahash_digest(req); +} + +static int ahash_init(struct ahash_request *req) +{ + struct caam_hash_state *state = ahash_request_ctx(req); + + state->update = ahash_update_first; + state->finup = ahash_finup_first; + state->final = ahash_final_no_ctx; + + state->current_buf = 0; + + return 0; +} + +static int ahash_update(struct ahash_request *req) +{ + struct caam_hash_state *state = ahash_request_ctx(req); + + return state->update(req); +} + +static int ahash_finup(struct ahash_request *req) +{ + struct caam_hash_state *state = ahash_request_ctx(req); + + return state->finup(req); +} + +static int ahash_final(struct ahash_request *req) +{ + struct caam_hash_state *state = ahash_request_ctx(req); + + return state->final(req); +} + +static int ahash_export(struct ahash_request *req, void *out) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + + memcpy(out, ctx, sizeof(struct caam_hash_ctx)); + memcpy(out + sizeof(struct caam_hash_ctx), state, + sizeof(struct caam_hash_state)); + return 0; +} + +static int ahash_import(struct ahash_request *req, const void *in) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + + memcpy(ctx, in, sizeof(struct caam_hash_ctx)); + memcpy(state, in + sizeof(struct caam_hash_ctx), + sizeof(struct caam_hash_state)); + return 0; +} + +struct caam_hash_template { + char name[CRYPTO_MAX_ALG_NAME]; + char driver_name[CRYPTO_MAX_ALG_NAME]; + unsigned int blocksize; + struct ahash_alg template_ahash; + u32 alg_type; + u32 alg_op; +}; + +/* ahash descriptors */ +static struct caam_hash_template driver_hash[] = { + { + .name = "hmac(sha1)", + .driver_name = "hmac-sha1-caam", + .blocksize = SHA1_BLOCK_SIZE, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_SHA1, + .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, + }, { + .name = "hmac(sha224)", + .driver_name = "hmac-sha224-caam", + .blocksize = SHA224_BLOCK_SIZE, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_SHA224, + .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, + }, { + .name = "hmac(sha256)", + .driver_name = "hmac-sha256-caam", + .blocksize = SHA256_BLOCK_SIZE, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_SHA256, + .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, + }, { + .name = "hmac(sha384)", + .driver_name = "hmac-sha384-caam", + .blocksize = SHA384_BLOCK_SIZE, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_SHA384, + .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, + }, { + .name = "hmac(sha512)", + .driver_name = "hmac-sha512-caam", + .blocksize = SHA512_BLOCK_SIZE, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_SHA512, + .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, + }, { + .name = "hmac(md5)", + .driver_name = "hmac-md5-caam", + .blocksize = MD5_BLOCK_WORDS * 4, + .template_ahash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .export = ahash_export, + .import = ahash_import, + .setkey = ahash_setkey, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + }, + }, + .alg_type = OP_ALG_ALGSEL_MD5, + .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, + }, +}; + +struct caam_hash_alg { + struct list_head entry; + struct device *ctrldev; + int alg_type; + int alg_op; + struct ahash_alg ahash_alg; +}; + +static int caam_hash_cra_init(struct crypto_tfm *tfm) +{ + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct crypto_alg *base = tfm->__crt_alg; + struct hash_alg_common *halg = + container_of(base, struct hash_alg_common, base); + struct ahash_alg *alg = + container_of(halg, struct ahash_alg, halg); + struct caam_hash_alg *caam_hash = + container_of(alg, struct caam_hash_alg, ahash_alg); + struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_drv_private *priv = dev_get_drvdata(caam_hash->ctrldev); + /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ + static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, + HASH_MSG_LEN + SHA1_DIGEST_SIZE, + HASH_MSG_LEN + 32, + HASH_MSG_LEN + SHA256_DIGEST_SIZE, + HASH_MSG_LEN + 64, + HASH_MSG_LEN + SHA512_DIGEST_SIZE }; + int tgt_jr = atomic_inc_return(&priv->tfm_count); + int ret = 0; + + /* + * distribute tfms across job rings to ensure in-order + * crypto request processing per tfm + */ + ctx->jrdev = priv->jrdev[tgt_jr % priv->total_jobrs]; + + /* copy descriptor header template value */ + ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type; + ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op; + + ctx->ctx_len = runninglen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> + OP_ALG_ALGSEL_SHIFT]; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct caam_hash_state)); + + ret = ahash_set_sh_desc(ahash); + + return ret; +} + +static void caam_hash_cra_exit(struct crypto_tfm *tfm) +{ + struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->sh_desc_update_dma && + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_dma, + desc_bytes(ctx->sh_desc_update), + DMA_TO_DEVICE); + if (ctx->sh_desc_update_first_dma && + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_first_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_first_dma, + desc_bytes(ctx->sh_desc_update_first), + DMA_TO_DEVICE); + if (ctx->sh_desc_fin_dma && + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_fin_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_fin_dma, + desc_bytes(ctx->sh_desc_fin), DMA_TO_DEVICE); + if (ctx->sh_desc_digest_dma && + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_digest_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma, + desc_bytes(ctx->sh_desc_digest), + DMA_TO_DEVICE); + if (ctx->sh_desc_finup_dma && + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma, + desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE); +} + +static void __exit caam_algapi_hash_exit(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + struct caam_hash_alg *t_alg, *n; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) + return; + + pdev = of_find_device_by_node(dev_node); + if (!pdev) + return; + + ctrldev = &pdev->dev; + of_node_put(dev_node); + priv = dev_get_drvdata(ctrldev); + + if (!priv->hash_list.next) + return; + + list_for_each_entry_safe(t_alg, n, &priv->hash_list, entry) { + crypto_unregister_ahash(&t_alg->ahash_alg); + list_del(&t_alg->entry); + kfree(t_alg); + } +} + +static struct caam_hash_alg * +caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template) +{ + struct caam_hash_alg *t_alg; + struct ahash_alg *halg; + struct crypto_alg *alg; + + t_alg = kzalloc(sizeof(struct caam_hash_alg), GFP_KERNEL); + if (!t_alg) { + dev_err(ctrldev, "failed to allocate t_alg\n"); + return ERR_PTR(-ENOMEM); + } + + t_alg->ahash_alg = template->template_ahash; + halg = &t_alg->ahash_alg; + alg = &halg->halg.base; + + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + template->driver_name); + alg->cra_module = THIS_MODULE; + alg->cra_init = caam_hash_cra_init; + alg->cra_exit = caam_hash_cra_exit; + alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_priority = CAAM_CRA_PRIORITY; + alg->cra_blocksize = template->blocksize; + alg->cra_alignmask = 0; + alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_TYPE_AHASH; + alg->cra_type = &crypto_ahash_type; + + t_alg->alg_type = template->alg_type; + t_alg->alg_op = template->alg_op; + t_alg->ctrldev = ctrldev; + + return t_alg; +} + +static int __init caam_algapi_hash_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + int i = 0, err = 0; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) + return -ENODEV; + + pdev = of_find_device_by_node(dev_node); + if (!pdev) + return -ENODEV; + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + INIT_LIST_HEAD(&priv->hash_list); + + atomic_set(&priv->tfm_count, -1); + + /* register crypto algorithms the device supports */ + for (i = 0; i < ARRAY_SIZE(driver_hash); i++) { + /* TODO: check if h/w supports alg */ + struct caam_hash_alg *t_alg; + + t_alg = caam_hash_alloc(ctrldev, &driver_hash[i]); + if (IS_ERR(t_alg)) { + err = PTR_ERR(t_alg); + dev_warn(ctrldev, "%s alg allocation failed\n", + driver_hash[i].driver_name); + continue; + } + + err = crypto_register_ahash(&t_alg->ahash_alg); + if (err) { + dev_warn(ctrldev, "%s alg registration failed\n", + t_alg->ahash_alg.halg.base.cra_driver_name); + kfree(t_alg); + } else + list_add_tail(&t_alg->entry, &priv->hash_list); + } + + return err; +} + +module_init(caam_algapi_hash_init); +module_exit(caam_algapi_hash_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FSL CAAM support for ahash functions of crypto API"); +MODULE_AUTHOR("Freescale Semiconductor - NMG"); diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index a63bc65..762aeff 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -33,5 +34,6 @@ #include #include #include +#include #endif /* !defined(CAAM_COMPAT_H) */ diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 462be99..ed2b739 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -88,6 +88,8 @@ struct caam_drv_private { atomic_t tfm_count ____cacheline_aligned; /* list of registered crypto algorithms (mk generic context handle?) */ struct list_head alg_list; + /* list of registered hash algorithms (mk generic context handle?) */ + struct list_head hash_list; /* * debugfs entries for developer view into driver/device diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index a6ad7a4..2dda9e3 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -82,3 +82,41 @@ static inline int sg_count(struct scatterlist *sg_list, int nbytes) return sg_nents; } + +/* Copy from len bytes of sg to dest, starting from beginning */ +static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len) +{ + struct scatterlist *current_sg = sg; + int cpy_index = 0, next_cpy_index = current_sg->length; + + while (next_cpy_index < len) { + memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg), + current_sg->length); + current_sg = scatterwalk_sg_next(current_sg); + cpy_index = next_cpy_index; + next_cpy_index += current_sg->length; + } + if (cpy_index < len) + memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg), + len - cpy_index); +} + +/* Copy sg data, from to_skip to end, to dest */ +static inline void sg_copy_part(u8 *dest, struct scatterlist *sg, + int to_skip, unsigned int end) +{ + struct scatterlist *current_sg = sg; + int sg_index, cpy_index; + + sg_index = current_sg->length; + while (sg_index <= to_skip) { + current_sg = scatterwalk_sg_next(current_sg); + sg_index += current_sg->length; + } + cpy_index = sg_index - to_skip; + memcpy(dest, (u8 *) sg_virt(current_sg) + + current_sg->length - cpy_index, cpy_index); + current_sg = scatterwalk_sg_next(current_sg); + if (end - sg_index) + sg_copy(dest + cpy_index, current_sg, end - sg_index); +} -- cgit v0.10.2 From b0e09bae37eeacb213d9baf8fcb4d48934a4ada5 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:48 -0500 Subject: crypto: caam - unkeyed ahash support caam supports and registers unkeyed sha algorithms and md5. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index c0b6d86..7dcf28f 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1485,6 +1485,8 @@ static int ahash_import(struct ahash_request *req, const void *in) struct caam_hash_template { char name[CRYPTO_MAX_ALG_NAME]; char driver_name[CRYPTO_MAX_ALG_NAME]; + char hmac_name[CRYPTO_MAX_ALG_NAME]; + char hmac_driver_name[CRYPTO_MAX_ALG_NAME]; unsigned int blocksize; struct ahash_alg template_ahash; u32 alg_type; @@ -1494,8 +1496,10 @@ struct caam_hash_template { /* ahash descriptors */ static struct caam_hash_template driver_hash[] = { { - .name = "hmac(sha1)", - .driver_name = "hmac-sha1-caam", + .name = "sha1", + .driver_name = "sha1-caam", + .hmac_name = "hmac(sha1)", + .hmac_driver_name = "hmac-sha1-caam", .blocksize = SHA1_BLOCK_SIZE, .template_ahash = { .init = ahash_init, @@ -1513,8 +1517,10 @@ static struct caam_hash_template driver_hash[] = { .alg_type = OP_ALG_ALGSEL_SHA1, .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, { - .name = "hmac(sha224)", - .driver_name = "hmac-sha224-caam", + .name = "sha224", + .driver_name = "sha224-caam", + .hmac_name = "hmac(sha224)", + .hmac_driver_name = "hmac-sha224-caam", .blocksize = SHA224_BLOCK_SIZE, .template_ahash = { .init = ahash_init, @@ -1532,8 +1538,10 @@ static struct caam_hash_template driver_hash[] = { .alg_type = OP_ALG_ALGSEL_SHA224, .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, { - .name = "hmac(sha256)", - .driver_name = "hmac-sha256-caam", + .name = "sha256", + .driver_name = "sha256-caam", + .hmac_name = "hmac(sha256)", + .hmac_driver_name = "hmac-sha256-caam", .blocksize = SHA256_BLOCK_SIZE, .template_ahash = { .init = ahash_init, @@ -1551,8 +1559,10 @@ static struct caam_hash_template driver_hash[] = { .alg_type = OP_ALG_ALGSEL_SHA256, .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, { - .name = "hmac(sha384)", - .driver_name = "hmac-sha384-caam", + .name = "sha384", + .driver_name = "sha384-caam", + .hmac_name = "hmac(sha384)", + .hmac_driver_name = "hmac-sha384-caam", .blocksize = SHA384_BLOCK_SIZE, .template_ahash = { .init = ahash_init, @@ -1570,8 +1580,10 @@ static struct caam_hash_template driver_hash[] = { .alg_type = OP_ALG_ALGSEL_SHA384, .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, { - .name = "hmac(sha512)", - .driver_name = "hmac-sha512-caam", + .name = "sha512", + .driver_name = "sha512-caam", + .hmac_name = "hmac(sha512)", + .hmac_driver_name = "hmac-sha512-caam", .blocksize = SHA512_BLOCK_SIZE, .template_ahash = { .init = ahash_init, @@ -1589,8 +1601,10 @@ static struct caam_hash_template driver_hash[] = { .alg_type = OP_ALG_ALGSEL_SHA512, .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, { - .name = "hmac(md5)", - .driver_name = "hmac-md5-caam", + .name = "md5", + .driver_name = "md5-caam", + .hmac_name = "hmac(md5)", + .hmac_driver_name = "hmac-md5-caam", .blocksize = MD5_BLOCK_WORDS * 4, .template_ahash = { .init = ahash_init, @@ -1721,7 +1735,8 @@ static void __exit caam_algapi_hash_exit(void) } static struct caam_hash_alg * -caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template) +caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template, + bool keyed) { struct caam_hash_alg *t_alg; struct ahash_alg *halg; @@ -1737,9 +1752,17 @@ caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template) halg = &t_alg->ahash_alg; alg = &halg->halg.base; - snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); - snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", - template->driver_name); + if (keyed) { + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", + template->hmac_name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + template->hmac_driver_name); + } else { + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", + template->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + template->driver_name); + } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; alg->cra_exit = caam_hash_cra_exit; @@ -1786,7 +1809,25 @@ static int __init caam_algapi_hash_init(void) /* TODO: check if h/w supports alg */ struct caam_hash_alg *t_alg; - t_alg = caam_hash_alloc(ctrldev, &driver_hash[i]); + /* register hmac version */ + t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], true); + if (IS_ERR(t_alg)) { + err = PTR_ERR(t_alg); + dev_warn(ctrldev, "%s alg allocation failed\n", + driver_hash[i].driver_name); + continue; + } + + err = crypto_register_ahash(&t_alg->ahash_alg); + if (err) { + dev_warn(ctrldev, "%s alg registration failed\n", + t_alg->ahash_alg.halg.base.cra_driver_name); + kfree(t_alg); + } else + list_add_tail(&t_alg->entry, &priv->hash_list); + + /* register unkeyed version */ + t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], false); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); dev_warn(ctrldev, "%s alg allocation failed\n", -- cgit v0.10.2 From 643b39b031f546c7c3c60ef360b8260aa2b32762 Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:49 -0500 Subject: crypto: caam - chaining support support chained scatterlists for aead, ablkcipher and ahash. Signed-off-by: Yuan Kang - fix dma unmap leak - un-unlikely src == dst, due to experience with AF_ALG Signed-off-by: Kudupudi Ugendreshwar Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 5ab480a..0c1ea84 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -654,8 +654,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, /* * aead_edesc - s/w-extended aead descriptor * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist + * @assoc_chained: if source is chained * @src_nents: number of segments in input scatterlist + * @src_chained: if source is chained * @dst_nents: number of segments in output scatterlist + * @dst_chained: if destination is chained * @iv_dma: dma address of iv for checking continuity and link table * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) * @sec4_sg_bytes: length of dma mapped sec4_sg space @@ -664,8 +667,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, */ struct aead_edesc { int assoc_nents; + bool assoc_chained; int src_nents; + bool src_chained; int dst_nents; + bool dst_chained; dma_addr_t iv_dma; int sec4_sg_bytes; dma_addr_t sec4_sg_dma; @@ -676,7 +682,9 @@ struct aead_edesc { /* * ablkcipher_edesc - s/w-extended ablkcipher descriptor * @src_nents: number of segments in input scatterlist + * @src_chained: if source is chained * @dst_nents: number of segments in output scatterlist + * @dst_chained: if destination is chained * @iv_dma: dma address of iv for checking continuity and link table * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) * @sec4_sg_bytes: length of dma mapped sec4_sg space @@ -685,7 +693,9 @@ struct aead_edesc { */ struct ablkcipher_edesc { int src_nents; + bool src_chained; int dst_nents; + bool dst_chained; dma_addr_t iv_dma; int sec4_sg_bytes; dma_addr_t sec4_sg_dma; @@ -694,15 +704,19 @@ struct ablkcipher_edesc { }; static void caam_unmap(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, int src_nents, int dst_nents, + struct scatterlist *dst, int src_nents, + bool src_chained, int dst_nents, bool dst_chained, dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma, int sec4_sg_bytes) { - if (unlikely(dst != src)) { - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); + if (dst != src) { + dma_unmap_sg_chained(dev, src, src_nents ? : 1, DMA_TO_DEVICE, + src_chained); + dma_unmap_sg_chained(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE, + dst_chained); } else { - dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); + dma_unmap_sg_chained(dev, src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); } if (iv_dma) @@ -719,12 +733,13 @@ static void aead_unmap(struct device *dev, struct crypto_aead *aead = crypto_aead_reqtfm(req); int ivsize = crypto_aead_ivsize(aead); - dma_unmap_sg(dev, req->assoc, edesc->assoc_nents, DMA_TO_DEVICE); + dma_unmap_sg_chained(dev, req->assoc, edesc->assoc_nents, + DMA_TO_DEVICE, edesc->assoc_chained); caam_unmap(dev, req->src, req->dst, - edesc->src_nents, edesc->dst_nents, - edesc->iv_dma, ivsize, edesc->sec4_sg_dma, - edesc->sec4_sg_bytes); + edesc->src_nents, edesc->src_chained, edesc->dst_nents, + edesc->dst_chained, edesc->iv_dma, ivsize, + edesc->sec4_sg_dma, edesc->sec4_sg_bytes); } static void ablkcipher_unmap(struct device *dev, @@ -735,9 +750,9 @@ static void ablkcipher_unmap(struct device *dev, int ivsize = crypto_ablkcipher_ivsize(ablkcipher); caam_unmap(dev, req->src, req->dst, - edesc->src_nents, edesc->dst_nents, - edesc->iv_dma, ivsize, edesc->sec4_sg_dma, - edesc->sec4_sg_bytes); + edesc->src_nents, edesc->src_chained, edesc->dst_nents, + edesc->dst_chained, edesc->iv_dma, ivsize, + edesc->sec4_sg_dma, edesc->sec4_sg_bytes); } static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, @@ -1128,25 +1143,26 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, dma_addr_t iv_dma = 0; int sgc; bool all_contig = true; + bool assoc_chained = false, src_chained = false, dst_chained = false; int ivsize = crypto_aead_ivsize(aead); int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; - assoc_nents = sg_count(req->assoc, req->assoclen); - src_nents = sg_count(req->src, req->cryptlen); + assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained); + src_nents = sg_count(req->src, req->cryptlen, &src_chained); if (unlikely(req->dst != req->src)) - dst_nents = sg_count(req->dst, req->cryptlen); + dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained); - sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1, + DMA_BIDIRECTIONAL, assoc_chained); if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, src_chained); + sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1, + DMA_FROM_DEVICE, dst_chained); } /* Check if data are contiguous */ @@ -1172,8 +1188,11 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, } edesc->assoc_nents = assoc_nents; + edesc->assoc_chained = assoc_chained; edesc->src_nents = src_nents; + edesc->src_chained = src_chained; edesc->dst_nents = dst_nents; + edesc->dst_chained = dst_chained; edesc->iv_dma = iv_dma; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + @@ -1307,24 +1326,25 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request int sgc; u32 contig = GIV_SRC_CONTIG | GIV_DST_CONTIG; int ivsize = crypto_aead_ivsize(aead); + bool assoc_chained = false, src_chained = false, dst_chained = false; int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; - assoc_nents = sg_count(req->assoc, req->assoclen); - src_nents = sg_count(req->src, req->cryptlen); + assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained); + src_nents = sg_count(req->src, req->cryptlen, &src_chained); if (unlikely(req->dst != req->src)) - dst_nents = sg_count(req->dst, req->cryptlen); + dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained); - sgc = dma_map_sg(jrdev, req->assoc, assoc_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1, + DMA_BIDIRECTIONAL, assoc_chained); if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, src_chained); + sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1, + DMA_FROM_DEVICE, dst_chained); } /* Check if data are contiguous */ @@ -1358,8 +1378,11 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request } edesc->assoc_nents = assoc_nents; + edesc->assoc_chained = assoc_chained; edesc->src_nents = src_nents; + edesc->src_chained = src_chained; edesc->dst_nents = dst_nents; + edesc->dst_chained = dst_chained; edesc->iv_dma = iv_dma; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + @@ -1459,21 +1482,22 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request bool iv_contig = false; int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); + bool src_chained = false, dst_chained = false; int sec4_sg_index; - src_nents = sg_count(req->src, req->nbytes); + src_nents = sg_count(req->src, req->nbytes, &src_chained); - if (unlikely(req->dst != req->src)) - dst_nents = sg_count(req->dst, req->nbytes); + if (req->dst != req->src) + dst_nents = sg_count(req->dst, req->nbytes, &dst_chained); if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, src_chained); + sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1, + DMA_FROM_DEVICE, dst_chained); } /* @@ -1497,7 +1521,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request } edesc->src_nents = src_nents; + edesc->src_chained = src_chained; edesc->dst_nents = dst_nents; + edesc->dst_chained = dst_chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + desc_bytes; @@ -1510,7 +1536,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request sec4_sg_index += 1 + src_nents; } - if (unlikely(dst_nents)) { + if (dst_nents) { sg_to_sec4_sg_last(req->dst, dst_nents, edesc->sec4_sg + sec4_sg_index, 0); } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 7dcf28f..895aaf2 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -175,9 +175,10 @@ static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev, /* Map req->src and put it in link table */ static inline void src_map_to_sec4_sg(struct device *jrdev, struct scatterlist *src, int src_nents, - struct sec4_sg_entry *sec4_sg) + struct sec4_sg_entry *sec4_sg, + bool chained) { - dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE); + dma_map_sg_chained(jrdev, src, src_nents, DMA_TO_DEVICE, chained); sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0); } @@ -563,6 +564,7 @@ badkey: * ahash_edesc - s/w-extended ahash descriptor * @dst_dma: physical mapped address of req->result * @sec4_sg_dma: physical mapped address of h/w link table + * @chained: if source is chained * @src_nents: number of segments in input scatterlist * @sec4_sg_bytes: length of dma mapped sec4_sg space * @sec4_sg: pointer to h/w link table @@ -571,6 +573,7 @@ badkey: struct ahash_edesc { dma_addr_t dst_dma; dma_addr_t sec4_sg_dma; + bool chained; int src_nents; int sec4_sg_bytes; struct sec4_sg_entry *sec4_sg; @@ -582,7 +585,8 @@ static inline void ahash_unmap(struct device *dev, struct ahash_request *req, int dst_len) { if (edesc->src_nents) - dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + dma_unmap_sg_chained(dev, req->src, edesc->src_nents, + DMA_TO_DEVICE, edesc->chained); if (edesc->dst_dma) dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE); @@ -775,6 +779,7 @@ static int ahash_update_ctx(struct ahash_request *req) dma_addr_t ptr = ctx->sh_desc_update_dma; int src_nents, sec4_sg_bytes, sec4_sg_src_index; struct ahash_edesc *edesc; + bool chained = false; int ret = 0; int sh_len; @@ -783,7 +788,8 @@ static int ahash_update_ctx(struct ahash_request *req) to_hash = in_len - *next_buflen; if (to_hash) { - src_nents = __sg_count(req->src, req->nbytes - (*next_buflen)); + src_nents = __sg_count(req->src, req->nbytes - (*next_buflen), + &chained); sec4_sg_src_index = 1 + (*buflen ? 1 : 0); sec4_sg_bytes = (sec4_sg_src_index + src_nents) * sizeof(struct sec4_sg_entry); @@ -801,6 +807,7 @@ static int ahash_update_ctx(struct ahash_request *req) } edesc->src_nents = src_nents; + edesc->chained = chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN; @@ -818,7 +825,8 @@ static int ahash_update_ctx(struct ahash_request *req) if (src_nents) { src_map_to_sec4_sg(jrdev, req->src, src_nents, - edesc->sec4_sg + sec4_sg_src_index); + edesc->sec4_sg + sec4_sg_src_index, + chained); if (*next_buflen) { sg_copy_part(next_buf, req->src, to_hash - *buflen, req->nbytes); @@ -958,10 +966,11 @@ static int ahash_finup_ctx(struct ahash_request *req) int src_nents; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; + bool chained = false; int ret = 0; int sh_len; - src_nents = __sg_count(req->src, req->nbytes); + src_nents = __sg_count(req->src, req->nbytes, &chained); sec4_sg_src_index = 1 + (buflen ? 1 : 0); sec4_sg_bytes = (sec4_sg_src_index + src_nents) * sizeof(struct sec4_sg_entry); @@ -979,6 +988,7 @@ static int ahash_finup_ctx(struct ahash_request *req) init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); edesc->src_nents = src_nents; + edesc->chained = chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN; @@ -993,7 +1003,7 @@ static int ahash_finup_ctx(struct ahash_request *req) last_buflen); src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + - sec4_sg_src_index); + sec4_sg_src_index, chained); append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen + req->nbytes, LDST_SGF); @@ -1030,12 +1040,14 @@ static int ahash_digest(struct ahash_request *req) int src_nents, sec4_sg_bytes; dma_addr_t src_dma; struct ahash_edesc *edesc; + bool chained = false; int ret = 0; u32 options; int sh_len; - src_nents = sg_count(req->src, req->nbytes); - dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + src_nents = sg_count(req->src, req->nbytes, &chained); + dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE, + chained); sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ @@ -1050,6 +1062,7 @@ static int ahash_digest(struct ahash_request *req) edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); edesc->src_nents = src_nents; + edesc->chained = chained; sh_len = desc_len(sh_desc); desc = edesc->hw_desc; @@ -1157,6 +1170,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) struct ahash_edesc *edesc; u32 *desc, *sh_desc = ctx->sh_desc_update_first; dma_addr_t ptr = ctx->sh_desc_update_first_dma; + bool chained = false; int ret = 0; int sh_len; @@ -1164,7 +1178,8 @@ static int ahash_update_no_ctx(struct ahash_request *req) to_hash = in_len - *next_buflen; if (to_hash) { - src_nents = __sg_count(req->src, req->nbytes - (*next_buflen)); + src_nents = __sg_count(req->src, req->nbytes - (*next_buflen), + &chained); sec4_sg_bytes = (1 + src_nents) * sizeof(struct sec4_sg_entry); @@ -1181,6 +1196,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) } edesc->src_nents = src_nents; + edesc->chained = chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN; @@ -1191,7 +1207,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf, *buflen); src_map_to_sec4_sg(jrdev, req->src, src_nents, - edesc->sec4_sg + 1); + edesc->sec4_sg + 1, chained); if (*next_buflen) { sg_copy_part(next_buf, req->src, to_hash - *buflen, req->nbytes); @@ -1258,10 +1274,11 @@ static int ahash_finup_no_ctx(struct ahash_request *req) int sec4_sg_bytes, sec4_sg_src_index, src_nents; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; + bool chained = false; int sh_len; int ret = 0; - src_nents = __sg_count(req->src, req->nbytes); + src_nents = __sg_count(req->src, req->nbytes, &chained); sec4_sg_src_index = 2; sec4_sg_bytes = (sec4_sg_src_index + src_nents) * sizeof(struct sec4_sg_entry); @@ -1279,6 +1296,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); edesc->src_nents = src_nents; + edesc->chained = chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN; @@ -1289,7 +1307,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) state->buf_dma, buflen, last_buflen); - src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1); + src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1, + chained); append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen + req->nbytes, LDST_SGF); @@ -1332,6 +1351,7 @@ static int ahash_update_first(struct ahash_request *req) dma_addr_t src_dma; u32 options; struct ahash_edesc *edesc; + bool chained = false; int ret = 0; int sh_len; @@ -1340,8 +1360,10 @@ static int ahash_update_first(struct ahash_request *req) to_hash = req->nbytes - *next_buflen; if (to_hash) { - src_nents = sg_count(req->src, req->nbytes - (*next_buflen)); - dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + src_nents = sg_count(req->src, req->nbytes - (*next_buflen), + &chained); + dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, chained); sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry); /* @@ -1357,6 +1379,7 @@ static int ahash_update_first(struct ahash_request *req) } edesc->src_nents = src_nents; + edesc->chained = chained; edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN; diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 2dda9e3..e0037c8 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -37,7 +37,7 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), sg_dma_len(sg), offset); sec4_sg_ptr++; - sg = sg_next(sg); + sg = scatterwalk_sg_next(sg); sg_count--; } return sec4_sg_ptr - 1; @@ -56,7 +56,8 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, } /* count number of elements in scatterlist */ -static inline int __sg_count(struct scatterlist *sg_list, int nbytes) +static inline int __sg_count(struct scatterlist *sg_list, int nbytes, + bool *chained) { struct scatterlist *sg = sg_list; int sg_nents = 0; @@ -65,7 +66,7 @@ static inline int __sg_count(struct scatterlist *sg_list, int nbytes) sg_nents++; nbytes -= sg->length; if (!sg_is_last(sg) && (sg + 1)->length == 0) - BUG(); /* Not support chaining */ + *chained = true; sg = scatterwalk_sg_next(sg); } @@ -73,9 +74,10 @@ static inline int __sg_count(struct scatterlist *sg_list, int nbytes) } /* derive number of elements in scatterlist, but return 0 for 1 */ -static inline int sg_count(struct scatterlist *sg_list, int nbytes) +static inline int sg_count(struct scatterlist *sg_list, int nbytes, + bool *chained) { - int sg_nents = __sg_count(sg_list, nbytes); + int sg_nents = __sg_count(sg_list, nbytes, chained); if (likely(sg_nents == 1)) return 0; @@ -83,6 +85,38 @@ static inline int sg_count(struct scatterlist *sg_list, int nbytes) return sg_nents; } +static int dma_map_sg_chained(struct device *dev, struct scatterlist *sg, + unsigned int nents, enum dma_data_direction dir, + bool chained) +{ + if (unlikely(chained)) { + int i; + for (i = 0; i < nents; i++) { + dma_map_sg(dev, sg, 1, dir); + sg = scatterwalk_sg_next(sg); + } + } else { + dma_map_sg(dev, sg, nents, dir); + } + return nents; +} + +static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg, + unsigned int nents, enum dma_data_direction dir, + bool chained) +{ + if (unlikely(chained)) { + int i; + for (i = 0; i < nents; i++) { + dma_unmap_sg(dev, sg, 1, dir); + sg = scatterwalk_sg_next(sg); + } + } else { + dma_unmap_sg(dev, sg, nents, dir); + } + return nents; +} + /* Copy from len bytes of sg to dest, starting from beginning */ static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len) { -- cgit v0.10.2 From e24f7c9e87d46fad06bf1097d48f9923acd8e61c Mon Sep 17 00:00:00 2001 From: Yuan Kang Date: Fri, 22 Jun 2012 19:48:50 -0500 Subject: crypto: caam - hwrng support caam_read copies random bytes from two buffers into output. caam rng can fill empty buffer 0xffff bytes at a time, but the buffer sizes are rounded down to multiple of cacheline size. Signed-off-by: Yuan Kang Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index e61b7f5..502a7f3 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -82,3 +82,16 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API To compile this as a module, choose M here: the module will be called caamhash. + +config CRYPTO_DEV_FSL_CAAM_RNG_API + tristate "Register caam device for hwrng API" + depends on CRYPTO_DEV_FSL_CAAM + default y + select CRYPTO_RNG + select HW_RANDOM + help + Selecting this will register the SEC4 hardware rng to + the hw_random API for suppying the kernel entropy pool. + + To compile this as a module, choose M here: the module + will be called caamrng. diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 9ef1cb0..b1eb448 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o caam-objs := ctrl.o jr.o error.o key_gen.o diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c new file mode 100644 index 0000000..e2bfe16 --- /dev/null +++ b/drivers/crypto/caam/caamrng.c @@ -0,0 +1,309 @@ +/* + * caam - Freescale FSL CAAM support for hw_random + * + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Based on caamalg.c crypto API driver. + * + * relationship between job descriptors to shared descriptors: + * + * --------------- -------------- + * | JobDesc #0 |-------------------->| ShareDesc | + * | *(buffer 0) | |------------->| (generate) | + * --------------- | | (move) | + * | | (store) | + * --------------- | -------------- + * | JobDesc #1 |------| + * | *(buffer 1) | + * --------------- + * + * A job desc looks like this: + * + * --------------------- + * | Header | + * | ShareDesc Pointer | + * | SEQ_OUT_PTR | + * | (output buffer) | + * --------------------- + * + * The SharedDesc never changes, and each job descriptor points to one of two + * buffers for each device, from which the data will be copied into the + * requested destination + */ + +#include +#include +#include + +#include "compat.h" + +#include "regs.h" +#include "intern.h" +#include "desc_constr.h" +#include "jr.h" +#include "error.h" + +/* + * Maximum buffer size: maximum number of random, cache-aligned bytes that + * will be generated and moved to seq out ptr (extlen not allowed) + */ +#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \ + L1_CACHE_BYTES) + +/* length of descriptors */ +#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) +#define DESC_RNG_LEN (10 * CAAM_CMD_SZ) + +/* Buffer, its dma address and lock */ +struct buf_data { + u8 buf[RN_BUF_SIZE]; + dma_addr_t addr; + struct completion filled; + u32 hw_desc[DESC_JOB_O_LEN]; +#define BUF_NOT_EMPTY 0 +#define BUF_EMPTY 1 +#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */ + atomic_t empty; +}; + +/* rng per-device context */ +struct caam_rng_ctx { + struct device *jrdev; + dma_addr_t sh_desc_dma; + u32 sh_desc[DESC_RNG_LEN]; + unsigned int cur_buf_idx; + int current_buf; + struct buf_data bufs[2]; +}; + +static struct caam_rng_ctx rng_ctx; + +static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) +{ + if (bd->addr) + dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE, + DMA_FROM_DEVICE); +} + +static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) +{ + struct device *jrdev = ctx->jrdev; + + if (ctx->sh_desc_dma) + dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN, + DMA_TO_DEVICE); + rng_unmap_buf(jrdev, &ctx->bufs[0]); + rng_unmap_buf(jrdev, &ctx->bufs[1]); +} + +static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) +{ + struct buf_data *bd; + + bd = (struct buf_data *)((char *)desc - + offsetof(struct buf_data, hw_desc)); + + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + atomic_set(&bd->empty, BUF_NOT_EMPTY); + complete(&bd->filled); +#ifdef DEBUG + print_hex_dump(KERN_ERR, "rng refreshed buf@: ", + DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1); +#endif +} + +static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) +{ + struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)]; + struct device *jrdev = ctx->jrdev; + u32 *desc = bd->hw_desc; + int err; + + dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf)); + init_completion(&bd->filled); + err = caam_jr_enqueue(jrdev, desc, rng_done, ctx); + if (err) + complete(&bd->filled); /* don't wait on failed job*/ + else + atomic_inc(&bd->empty); /* note if pending */ + + return err; +} + +static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct caam_rng_ctx *ctx = &rng_ctx; + struct buf_data *bd = &ctx->bufs[ctx->current_buf]; + int next_buf_idx, copied_idx; + int err; + + if (atomic_read(&bd->empty)) { + /* try to submit job if there wasn't one */ + if (atomic_read(&bd->empty) == BUF_EMPTY) { + err = submit_job(ctx, 1); + /* if can't submit job, can't even wait */ + if (err) + return 0; + } + /* no immediate data, so exit if not waiting */ + if (!wait) + return 0; + + /* waiting for pending job */ + if (atomic_read(&bd->empty)) + wait_for_completion(&bd->filled); + } + + next_buf_idx = ctx->cur_buf_idx + max; + dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n", + __func__, ctx->current_buf, ctx->cur_buf_idx); + + /* if enough data in current buffer */ + if (next_buf_idx < RN_BUF_SIZE) { + memcpy(data, bd->buf + ctx->cur_buf_idx, max); + ctx->cur_buf_idx = next_buf_idx; + return max; + } + + /* else, copy what's left... */ + copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx; + memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx); + ctx->cur_buf_idx = 0; + atomic_set(&bd->empty, BUF_EMPTY); + + /* ...refill... */ + submit_job(ctx, 1); + + /* and use next buffer */ + ctx->current_buf = !ctx->current_buf; + dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf); + + /* since there already is some data read, don't wait */ + return copied_idx + caam_read(rng, data + copied_idx, + max - copied_idx, false); +} + +static inline void rng_create_sh_desc(struct caam_rng_ctx *ctx) +{ + struct device *jrdev = ctx->jrdev; + u32 *desc = ctx->sh_desc; + + init_sh_desc(desc, HDR_SHARE_WAIT); + + /* Propagate errors from shared to job descriptor */ + append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); + + /* Generate random bytes */ + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); + + /* Store bytes */ + append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE); + + ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), + DMA_TO_DEVICE); +#ifdef DEBUG + print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4, + desc, desc_bytes(desc), 1); +#endif +} + +static inline void rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) +{ + struct device *jrdev = ctx->jrdev; + struct buf_data *bd = &ctx->bufs[buf_id]; + u32 *desc = bd->hw_desc; + int sh_len = desc_len(ctx->sh_desc); + + init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + + bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE); + + append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0); +#ifdef DEBUG + print_hex_dump(KERN_ERR, "rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, + desc, desc_bytes(desc), 1); +#endif +} + +static void caam_cleanup(struct hwrng *rng) +{ + int i; + struct buf_data *bd; + + for (i = 0; i < 2; i++) { + bd = &rng_ctx.bufs[i]; + if (atomic_read(&bd->empty) == BUF_PENDING) + wait_for_completion(&bd->filled); + } + + rng_unmap_ctx(&rng_ctx); +} + +static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) +{ + struct buf_data *bd = &ctx->bufs[buf_id]; + + rng_create_job_desc(ctx, buf_id); + atomic_set(&bd->empty, BUF_EMPTY); + submit_job(ctx, buf_id == ctx->current_buf); + wait_for_completion(&bd->filled); +} + +static void caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) +{ + ctx->jrdev = jrdev; + rng_create_sh_desc(ctx); + ctx->current_buf = 0; + ctx->cur_buf_idx = 0; + caam_init_buf(ctx, 0); + caam_init_buf(ctx, 1); +} + +static struct hwrng caam_rng = { + .name = "rng-caam", + .cleanup = caam_cleanup, + .read = caam_read, +}; + +static void __exit caam_rng_exit(void) +{ + hwrng_unregister(&caam_rng); +} + +static int __init caam_rng_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) + return -ENODEV; + + pdev = of_find_device_by_node(dev_node); + if (!pdev) + return -ENODEV; + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + caam_init_rng(&rng_ctx, priv->jrdev[0]); + + dev_info(priv->jrdev[0], "registering rng-caam\n"); + return hwrng_register(&caam_rng); +} + +module_init(caam_rng_init); +module_exit(caam_rng_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FSL CAAM support for hw_random API"); +MODULE_AUTHOR("Freescale Semiconductor - NMG"); -- cgit v0.10.2 From e13af18a3e33259e264c7fb47b54fbf608137976 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:51 -0500 Subject: crypto: caam - assign 40-bit masks on SEC v5.0 and above SEC v4.x were only 36-bit, SEC v5+ are 40-bit capable. Also set a DMA mask for any job ring devices created. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 77557eb..9a2db9c 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -82,13 +82,18 @@ static int caam_probe(struct platform_device *pdev) /* * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, - * 36-bit pointers in master configuration register + * long pointers in master configuration register */ setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0)); if (sizeof(dma_addr_t) == sizeof(u64)) - dma_set_mask(dev, DMA_BIT_MASK(36)); + if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) + dma_set_mask(dev, DMA_BIT_MASK(40)); + else + dma_set_mask(dev, DMA_BIT_MASK(36)); + else + dma_set_mask(dev, DMA_BIT_MASK(32)); /* * Detect and enable JobRs diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 6ce4c41..9f16b2c 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -503,6 +503,14 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np, dev_set_drvdata(jrdev, jrpriv); ctrlpriv->jrdev[ring] = jrdev; + if (sizeof(dma_addr_t) == sizeof(u64)) + if (of_device_is_compatible(np, "fsl,sec-v5.0-job-ring")) + dma_set_mask(jrdev, DMA_BIT_MASK(40)); + else + dma_set_mask(jrdev, DMA_BIT_MASK(36)); + else + dma_set_mask(jrdev, DMA_BIT_MASK(32)); + /* Identify the interrupt */ jrpriv->irq = of_irq_to_resource(np, 0, NULL); -- cgit v0.10.2 From 281922a1d4f59bdebbe78c1d9f4c50a967eb6cff Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:52 -0500 Subject: crypto: caam - add support for SEC v5.x RNG4 The SEC v4.x' RNGB h/w block self-initialized. RNG4, available on SEC versions 5 and beyond, is based on a different standard that requires manual initialization. Also update any new errors From the SEC v5.2 reference manual: The SEC v5.2's RNG4 unit reuses some error IDs, thus the addition of rng_err_id_list over the CHA-independent err_id_list. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 9a2db9c..ac6abb3 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -2,13 +2,15 @@ * CAAM control-plane driver backend * Controller-level driver, kernel property detection, initialization * - * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2008-2012 Freescale Semiconductor, Inc. */ #include "compat.h" #include "regs.h" #include "intern.h" #include "jr.h" +#include "desc_constr.h" +#include "error.h" static int caam_remove(struct platform_device *pdev) { @@ -43,10 +45,120 @@ static int caam_remove(struct platform_device *pdev) return ret; } +/* + * Descriptor to instantiate RNG State Handle 0 in normal mode and + * load the JDKEK, TDKEK and TDSK registers + */ +static void build_instantiation_desc(u32 *desc) +{ + u32 *jump_cmd; + + init_job_desc(desc, 0); + + /* INIT RNG in non-test mode */ + append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | + OP_ALG_AS_INIT); + + /* wait for done */ + jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1); + set_jump_tgt_here(desc, jump_cmd); + + /* + * load 1 to clear written reg: + * resets the done interrrupt and returns the RNG to idle. + */ + append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW); + + /* generate secure keys (non-test) */ + append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | + OP_ALG_RNG4_SK); +} + +struct instantiate_result { + struct completion completion; + int err; +}; + +static void rng4_init_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct instantiate_result *instantiation = context; + + if (err) { + char tmp[CAAM_ERROR_STR_MAX]; + + dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + + instantiation->err = err; + complete(&instantiation->completion); +} + +static int instantiate_rng(struct device *jrdev) +{ + struct instantiate_result instantiation; + + dma_addr_t desc_dma; + u32 *desc; + int ret; + + desc = kmalloc(CAAM_CMD_SZ * 6, GFP_KERNEL | GFP_DMA); + if (!desc) { + dev_err(jrdev, "cannot allocate RNG init descriptor memory\n"); + return -ENOMEM; + } + + build_instantiation_desc(desc); + desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); + init_completion(&instantiation.completion); + ret = caam_jr_enqueue(jrdev, desc, rng4_init_done, &instantiation); + if (!ret) { + wait_for_completion_interruptible(&instantiation.completion); + ret = instantiation.err; + if (ret) + dev_err(jrdev, "unable to instantiate RNG\n"); + } + + dma_unmap_single(jrdev, desc_dma, desc_bytes(desc), DMA_TO_DEVICE); + + kfree(desc); + + return ret; +} + +/* + * By default, the TRNG runs for 200 clocks per sample; + * 800 clocks per sample generates better entropy. + */ +static void kick_trng(struct platform_device *pdev) +{ + struct device *ctrldev = &pdev->dev; + struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); + struct caam_full __iomem *topregs; + struct rng4tst __iomem *r4tst; + u32 val; + + topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; + r4tst = &topregs->ctrl.r4tst[0]; + + /* put RNG4 into program mode */ + setbits32(&r4tst->rtmctl, RTMCTL_PRGM); + /* 800 clocks per sample */ + val = rd_reg32(&r4tst->rtsdctl); + val = (val & ~RTSDCTL_ENT_DLY_MASK) | (800 << RTSDCTL_ENT_DLY_SHIFT); + wr_reg32(&r4tst->rtsdctl, val); + /* min. freq. count */ + wr_reg32(&r4tst->rtfrqmin, 400); + /* max. freq. count */ + wr_reg32(&r4tst->rtfrqmax, 6400); + /* put RNG4 into run mode */ + clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); +} + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { - int ring, rspec; + int ret, ring, rspec; struct device *dev; struct device_node *nprop, *np; struct caam_ctrl __iomem *ctrl; @@ -146,6 +258,19 @@ static int caam_probe(struct platform_device *pdev) return -ENOMEM; } + /* + * RNG4 based SECs (v5+) need special initialization prior + * to executing any descriptors + */ + if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) { + kick_trng(pdev); + ret = instantiate_rng(ctrlpriv->jrdev[0]); + if (ret) { + caam_remove(pdev); + return ret; + } + } + /* NOTE: RTIC detection ought to go here, around Si time */ /* Initialize queue allocator lock */ diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 3e68506..f7f833b 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1172,6 +1172,11 @@ struct sec4_sg_entry { #define OP_ALG_AAI_GSM (0x10 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_EDGE (0x20 << OP_ALG_AAI_SHIFT) +/* RNG4 set */ +#define OP_ALG_RNG4_SHIFT 4 +#define OP_ALG_RNG4_MASK (0x1f3 << OP_ALG_RNG4_SHIFT) + +#define OP_ALG_RNG4_SK (0x100 << OP_ALG_RNG4_SHIFT) #define OP_ALG_AS_SHIFT 2 #define OP_ALG_AS_MASK (0x3 << OP_ALG_AS_SHIFT) diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 7e2d54b..9955ed9 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -39,18 +39,20 @@ static void report_ccb_status(u32 status, char *outstr) char *cha_id_list[] = { "", "AES", - "DES, 3DES", + "DES", "ARC4", - "MD5, SHA-1, SH-224, SHA-256, SHA-384, SHA-512", + "MDHA", "RNG", "SNOW f8", - "Kasumi f8, f9", - "All Public Key Algorithms", - "CRC", + "Kasumi f8/9", + "PKHA", + "CRCA", "SNOW f9", + "ZUCE", + "ZUCA", }; char *err_id_list[] = { - "None. No error.", + "No error.", "Mode error.", "Data size error.", "Key size error.", @@ -67,6 +69,20 @@ static void report_ccb_status(u32 status, char *outstr) "Invalid CHA combination was selected", "Invalid CHA selected.", }; + char *rng_err_id_list[] = { + "", + "", + "", + "Instantiate", + "Not instantiated", + "Test instantiate", + "Prediction resistance", + "", + "Prediction resistance and test request", + "Uninstantiate", + "", + "Secure key generation", + }; u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >> JRSTA_CCBERR_CHAID_SHIFT; u8 err_id = status & JRSTA_CCBERR_ERRID_MASK; @@ -81,7 +97,13 @@ static void report_ccb_status(u32 status, char *outstr) cha_id, sizeof("ff")); } - if (err_id < ARRAY_SIZE(err_id_list)) { + if ((cha_id << JRSTA_CCBERR_CHAID_SHIFT) == JRSTA_CCBERR_CHAID_RNG && + err_id < ARRAY_SIZE(rng_err_id_list) && + strlen(rng_err_id_list[err_id])) { + /* RNG-only error */ + SPRINTFCAT(outstr, "%s", rng_err_id_list[err_id], + strlen(rng_err_id_list[err_id])); + } else if (err_id < ARRAY_SIZE(err_id_list)) { SPRINTFCAT(outstr, "%s", err_id_list[err_id], strlen(err_id_list[err_id])); } else { @@ -101,10 +123,10 @@ static void report_deco_status(u32 status, char *outstr) u8 value; char *error_text; } desc_error_list[] = { - { 0x00, "None. No error." }, + { 0x00, "No error." }, { 0x01, "SGT Length Error. The descriptor is trying to read " "more data than is contained in the SGT table." }, - { 0x02, "Reserved." }, + { 0x02, "SGT Null Entry Error." }, { 0x03, "Job Ring Control Error. There is a bad value in the " "Job Ring Control register." }, { 0x04, "Invalid Descriptor Command. The Descriptor Command " @@ -116,7 +138,7 @@ static void report_deco_status(u32 status, char *outstr) { 0x09, "Invalid OPERATION Command" }, { 0x0A, "Invalid FIFO LOAD Command" }, { 0x0B, "Invalid FIFO STORE Command" }, - { 0x0C, "Invalid MOVE Command" }, + { 0x0C, "Invalid MOVE/MOVE_LEN Command" }, { 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is " "invalid because the target is not a Job Header " "Command, or the jump is from a Trusted Descriptor to " @@ -166,6 +188,8 @@ static void report_deco_status(u32 status, char *outstr) "(input frame; block ciphers) and IPsec decap (output " "frame, when doing the next header byte update) and " "DCRC (output frame)." }, + { 0x23, "Read Input Frame error" }, + { 0x24, "JDKEK, TDKEK or TDSK not loaded error" }, { 0x80, "DNR (do not run) error" }, { 0x81, "undefined protocol command" }, { 0x82, "invalid setting in PDB" }, diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index e9f7a70..6d9f1d9 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -167,7 +167,7 @@ struct partid { u32 pidr; /* partition ID, DECO */ }; -/* RNG test mode (replicated twice in some configurations) */ +/* RNGB test mode (replicated twice in some configurations) */ /* Padded out to 0x100 */ struct rngtst { u32 mode; /* RTSTMODEx - Test mode */ @@ -200,6 +200,31 @@ struct rngtst { u32 rsvd14[15]; }; +/* RNG4 TRNG test registers */ +struct rng4tst { +#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ + u32 rtmctl; /* misc. control register */ + u32 rtscmisc; /* statistical check misc. register */ + u32 rtpkrrng; /* poker range register */ + union { + u32 rtpkrmax; /* PRGM=1: poker max. limit register */ + u32 rtpkrsq; /* PRGM=0: poker square calc. result register */ + }; +#define RTSDCTL_ENT_DLY_SHIFT 16 +#define RTSDCTL_ENT_DLY_MASK (0xffff << RTSDCTL_ENT_DLY_SHIFT) + u32 rtsdctl; /* seed control register */ + union { + u32 rtsblim; /* PRGM=1: sparse bit limit register */ + u32 rttotsam; /* PRGM=0: total samples register */ + }; + u32 rtfrqmin; /* frequency count min. limit register */ + union { + u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ + u32 rtfrqcnt; /* PRGM=0: freq. count register */ + }; + u32 rsvd1[56]; +}; + /* * caam_ctrl - basic core configuration * starts base + 0x0000 padded out to 0x1000 @@ -249,7 +274,10 @@ struct caam_ctrl { /* RNG Test/Verification/Debug Access 600-7ff */ /* (Useful in Test/Debug modes only...) */ - struct rngtst rtst[2]; + union { + struct rngtst rtst[2]; + struct rng4tst r4tst[2]; + }; u32 rsvd9[448]; -- cgit v0.10.2 From 1a076689cda8a1d623dcda170b2dc2b476cc6f1a Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:53 -0500 Subject: crypto: caam - disable IRQ coalescing by default It has been observed that in zero-loss benchmarks, when a slow traffic rate is being tested, the IRQ timer coalescing parameter was set too high, and the ethernet controller would start dropping packets because the job ring back half wouldn't be executed in time before the ethernet controller would fill its buffers, thereby significantly reducing the zero-loss performance figures. Empirical testing has shown that the best zero-loss performance is achieved when IRQ coalescing is set to minimum values and/or turned off, since apparently the job ring driver already implements an adequately-performing general-purpose IRQ mitigation strategy in software. Whilst we could go with minimal count (2-8) and timing settings (192-256), we prefer just turning h/w coalescing altogether off to minimize setkey latency (due to split key generation), and for consistent cross-SoC performance (the SEC vs. core clock ratio changes). Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 502a7f3..65c7668 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -32,10 +32,13 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE config CRYPTO_DEV_FSL_CAAM_INTC bool "Job Ring interrupt coalescing" depends on CRYPTO_DEV_FSL_CAAM - default y + default n help Enable the Job Ring's interrupt coalescing feature. + Note: the driver already provides adequate + interrupt coalescing in software. + config CRYPTO_DEV_FSL_CAAM_INTC_COUNT_THLD int "Job Ring interrupt coalescing count threshold" depends on CRYPTO_DEV_FSL_CAAM_INTC -- cgit v0.10.2 From 4bba1e9f41d68279ff2c17db53fbd379692b10bc Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:54 -0500 Subject: crypto: caam - use non-irq versions of spinlocks for job rings The enqueue lock isn't used in any interrupt context, and the dequeue lock isn't used in the h/w interrupt context, only in bh context. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 9f16b2c..11d93f2 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -2,7 +2,7 @@ * CAAM/SEC 4.x transport/backend driver * JobR backend functionality * - * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2008-2012 Freescale Semiconductor, Inc. */ #include "compat.h" @@ -58,9 +58,8 @@ static void caam_jr_dequeue(unsigned long devarg) void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg); u32 *userdesc, userstatus; void *userarg; - unsigned long flags; - spin_lock_irqsave(&jrp->outlock, flags); + spin_lock_bh(&jrp->outlock); head = ACCESS_ONCE(jrp->head); sw_idx = tail = jrp->tail; @@ -118,18 +117,18 @@ static void caam_jr_dequeue(unsigned long devarg) /* set done */ wr_reg32(&jrp->rregs->outring_rmvd, 1); - spin_unlock_irqrestore(&jrp->outlock, flags); + spin_unlock_bh(&jrp->outlock); /* Finally, execute user's callback */ usercall(dev, userdesc, userstatus, userarg); - spin_lock_irqsave(&jrp->outlock, flags); + spin_lock_bh(&jrp->outlock); head = ACCESS_ONCE(jrp->head); sw_idx = tail = jrp->tail; } - spin_unlock_irqrestore(&jrp->outlock, flags); + spin_unlock_bh(&jrp->outlock); /* reenable / unmask IRQs */ clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); @@ -148,23 +147,22 @@ int caam_jr_register(struct device *ctrldev, struct device **rdev) { struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); struct caam_drv_private_jr *jrpriv = NULL; - unsigned long flags; int ring; /* Lock, if free ring - assign, unlock */ - spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags); + spin_lock(&ctrlpriv->jr_alloc_lock); for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]); if (jrpriv->assign == JOBR_UNASSIGNED) { jrpriv->assign = JOBR_ASSIGNED; *rdev = ctrlpriv->jrdev[ring]; - spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags); + spin_unlock(&ctrlpriv->jr_alloc_lock); return ring; } } /* If assigned, write dev where caller needs it */ - spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags); + spin_unlock(&ctrlpriv->jr_alloc_lock); *rdev = NULL; return -ENODEV; @@ -182,7 +180,6 @@ int caam_jr_deregister(struct device *rdev) { struct caam_drv_private_jr *jrpriv = dev_get_drvdata(rdev); struct caam_drv_private *ctrlpriv; - unsigned long flags; /* Get the owning controller's private space */ ctrlpriv = dev_get_drvdata(jrpriv->parentdev); @@ -195,9 +192,9 @@ int caam_jr_deregister(struct device *rdev) return -EBUSY; /* Release ring */ - spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags); + spin_lock(&ctrlpriv->jr_alloc_lock); jrpriv->assign = JOBR_UNASSIGNED; - spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags); + spin_unlock(&ctrlpriv->jr_alloc_lock); return 0; } @@ -238,7 +235,6 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); struct caam_jrentry_info *head_entry; - unsigned long flags; int head, tail, desc_size; dma_addr_t desc_dma; @@ -249,14 +245,14 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, return -EIO; } - spin_lock_irqsave(&jrp->inplock, flags); + spin_lock(&jrp->inplock); head = jrp->head; tail = ACCESS_ONCE(jrp->tail); if (!rd_reg32(&jrp->rregs->inpring_avail) || CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { - spin_unlock_irqrestore(&jrp->inplock, flags); + spin_unlock(&jrp->inplock); dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE); return -EBUSY; } @@ -280,7 +276,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, wr_reg32(&jrp->rregs->inpring_jobadd, 1); - spin_unlock_irqrestore(&jrp->inplock, flags); + spin_unlock(&jrp->inplock); return 0; } -- cgit v0.10.2 From a8ea07c21d40cf17dd9cbe3cbf87d477b26c354f Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:55 -0500 Subject: crypto: caam - only query h/w in job ring dequeue path Code was needlessly checking the s/w job ring when there would be nothing to process if the h/w's output completion ring were empty anyway. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 11d93f2..7ae5e51 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -59,15 +59,15 @@ static void caam_jr_dequeue(unsigned long devarg) u32 *userdesc, userstatus; void *userarg; - spin_lock_bh(&jrp->outlock); + while (rd_reg32(&jrp->rregs->outring_used)) { - head = ACCESS_ONCE(jrp->head); - sw_idx = tail = jrp->tail; + head = ACCESS_ONCE(jrp->head); - while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 && - rd_reg32(&jrp->rregs->outring_used)) { + spin_lock_bh(&jrp->outlock); + sw_idx = tail = jrp->tail; hw_idx = jrp->out_ring_read_index; + for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) { sw_idx = (tail + i) & (JOBR_DEPTH - 1); @@ -121,15 +121,8 @@ static void caam_jr_dequeue(unsigned long devarg) /* Finally, execute user's callback */ usercall(dev, userdesc, userstatus, userarg); - - spin_lock_bh(&jrp->outlock); - - head = ACCESS_ONCE(jrp->head); - sw_idx = tail = jrp->tail; } - spin_unlock_bh(&jrp->outlock); - /* reenable / unmask IRQs */ clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); } -- cgit v0.10.2 From 14a8e29cc2012394d3e886b11402eabd49a4d609 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:56 -0500 Subject: crypto: caam - consolidate memory barriers from job ring en/dequeue Memory barriers are implied by the i/o register write implementation (at least on Power). So we can remove the redundant wmb() in caam_jr_enqueue, and, in dequeue(), hoist the h/w done notification write up to before we need to increment the head of the ring, and save an smp_mb. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 7ae5e51..0adaad1 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -94,7 +94,8 @@ static void caam_jr_dequeue(unsigned long devarg) userdesc = jrp->entinfo[sw_idx].desc_addr_virt; userstatus = jrp->outring[hw_idx].jrstatus; - smp_mb(); + /* set done */ + wr_reg32(&jrp->rregs->outring_rmvd, 1); jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) & (JOBR_DEPTH - 1); @@ -114,9 +115,6 @@ static void caam_jr_dequeue(unsigned long devarg) jrp->tail = tail; } - /* set done */ - wr_reg32(&jrp->rregs->outring_rmvd, 1); - spin_unlock_bh(&jrp->outlock); /* Finally, execute user's callback */ @@ -265,8 +263,6 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, (JOBR_DEPTH - 1); jrp->head = (head + 1) & (JOBR_DEPTH - 1); - wmb(); - wr_reg32(&jrp->rregs->inpring_jobadd, 1); spin_unlock(&jrp->inplock); -- cgit v0.10.2 From a0ca6ca022ac197e159bb5d22a08e3c3aebb242c Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 22 Jun 2012 19:48:57 -0500 Subject: crypto: caam - one tasklet per job ring there is no noticeable benefit for multiple cores to process one job ring's output ring: in fact, we can benefit from cache effects of having the back-half stay on the core that receives a particular ring's interrupts, and further relax general contention and the locking involved with reading outring_used, since tasklets run atomically. Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index ed2b739..5cd4c1b 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -43,7 +43,7 @@ struct caam_drv_private_jr { struct device *parentdev; /* points back to controller dev */ int ridx; struct caam_job_ring __iomem *rregs; /* JobR's register space */ - struct tasklet_struct irqtask[NR_CPUS]; + struct tasklet_struct irqtask; int irq; /* One per queue */ int assign; /* busy/free */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 0adaad1..7074a1a 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -43,7 +43,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) wr_reg32(&jrp->rregs->jrintstatus, irqstate); preempt_disable(); - tasklet_schedule(&jrp->irqtask[smp_processor_id()]); + tasklet_schedule(&jrp->irqtask); preempt_enable(); return IRQ_HANDLED; @@ -322,11 +322,9 @@ static int caam_jr_init(struct device *dev) jrp = dev_get_drvdata(dev); - /* Connect job ring interrupt handler. */ - for_each_possible_cpu(i) - tasklet_init(&jrp->irqtask[i], caam_jr_dequeue, - (unsigned long)dev); + tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev); + /* Connect job ring interrupt handler. */ error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED, "caam-jobr", dev); if (error) { @@ -416,12 +414,11 @@ int caam_jr_shutdown(struct device *dev) { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); dma_addr_t inpbusaddr, outbusaddr; - int ret, i; + int ret; ret = caam_reset_hw_jr(dev); - for_each_possible_cpu(i) - tasklet_kill(&jrp->irqtask[i]); + tasklet_kill(&jrp->irqtask); /* Release interrupt */ free_irq(jrp->irq, dev); -- cgit v0.10.2 From b9b0f080fac541d88eee4d5175c3f6470a9e9189 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 26 Jun 2012 18:13:46 +0200 Subject: crypto: arc4 - now arc needs blockcipher support Since commit ce6dd368 ("crypto: arc4 - improve performance by adding ecb(arc4)) we need to pull in a blkcipher. |ERROR: "crypto_blkcipher_type" [crypto/arc4.ko] undefined! |ERROR: "blkcipher_walk_done" [crypto/arc4.ko] undefined! |ERROR: "blkcipher_walk_virt" [crypto/arc4.ko] undefined! Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu diff --git a/crypto/Kconfig b/crypto/Kconfig index 02e3037..a323805 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -604,7 +604,7 @@ config CRYPTO_ANUBIS config CRYPTO_ARC4 tristate "ARC4 cipher algorithm" - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER help ARC4 cipher algorithm. -- cgit v0.10.2 From 26c8aaebc188b539a0a9077350009a059464097d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 27 Jun 2012 13:31:01 +0200 Subject: crypto: algapi - Fix hang on crypto allocation git commit 398710379 (crypto: algapi - Move larval completion into algboss) replaced accidentally a call to complete_all() by a call to complete(). This causes a hang on crypto allocation if we have more than one larval waiter. This pach restores the call to complete_all(). Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu diff --git a/crypto/algboss.c b/crypto/algboss.c index f97027e..769219b 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -87,7 +87,7 @@ static int cryptomgr_probe(void *data) crypto_tmpl_put(tmpl); out: - complete(param->completion); + complete_all(param->completion); kfree(param); module_put_and_exit(0); } -- cgit v0.10.2 From 1af8ea862c9a9a6d5dc100850036cc7a641bb242 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 11 Jul 2012 11:06:10 +0800 Subject: crypto: caam - Using alloc_coherent for caam job rings The caam job rings (input/output job ring) are allocated using dma_map_single(). These job rings can be visualized as the ring buffers in which the jobs are en-queued/de-queued. The s/w enqueues the jobs in input job ring which h/w dequeues and after processing it copies the jobs in output job ring. Software then de-queues the job from output ring. Using dma_map/unmap_single() is not preferred way to allocate memory for this type of requirements because this adds un-necessary complexity. Example, if bounce buffer (SWIOTLB) will get used then to make any change visible in this memory to other processing unit requires dmap_unmap_single() or dma_sync_single_for_cpu/device(). The dma_unmap_single() can not be used as this will free the bounce buffer, this will require changing the job rings on running system and I seriously doubt that it will be not possible or very complex to implement. Also using dma_sync_single_for_cpu/device() will also add unnecessary complexity. The simple and preferred way is using dma_alloc_coherent() for these type of memory requirements. This resolves the Linux boot crash issue when "swiotlb=force" is set in bootargs on systems which have memory more than 4G. Signed-off-by: Bharat Bhushan Acked-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 7074a1a..53c8c51 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -339,10 +339,11 @@ static int caam_jr_init(struct device *dev) if (error) return error; - jrp->inpring = kzalloc(sizeof(dma_addr_t) * JOBR_DEPTH, - GFP_KERNEL | GFP_DMA); - jrp->outring = kzalloc(sizeof(struct jr_outentry) * - JOBR_DEPTH, GFP_KERNEL | GFP_DMA); + jrp->inpring = dma_alloc_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH, + &inpbusaddr, GFP_KERNEL); + + jrp->outring = dma_alloc_coherent(dev, sizeof(struct jr_outentry) * + JOBR_DEPTH, &outbusaddr, GFP_KERNEL); jrp->entinfo = kzalloc(sizeof(struct caam_jrentry_info) * JOBR_DEPTH, GFP_KERNEL); @@ -358,31 +359,6 @@ static int caam_jr_init(struct device *dev) jrp->entinfo[i].desc_addr_dma = !0; /* Setup rings */ - inpbusaddr = dma_map_single(dev, jrp->inpring, - sizeof(dma_addr_t) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, inpbusaddr)) { - dev_err(dev, "caam_jr_init(): can't map input ring\n"); - kfree(jrp->inpring); - kfree(jrp->outring); - kfree(jrp->entinfo); - return -EIO; - } - - outbusaddr = dma_map_single(dev, jrp->outring, - sizeof(struct jr_outentry) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, outbusaddr)) { - dev_err(dev, "caam_jr_init(): can't map output ring\n"); - dma_unmap_single(dev, inpbusaddr, - sizeof(dma_addr_t) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); - kfree(jrp->inpring); - kfree(jrp->outring); - kfree(jrp->entinfo); - return -EIO; - } - jrp->inp_ring_write_index = 0; jrp->out_ring_read_index = 0; jrp->head = 0; @@ -426,13 +402,10 @@ int caam_jr_shutdown(struct device *dev) /* Free rings */ inpbusaddr = rd_reg64(&jrp->rregs->inpring_base); outbusaddr = rd_reg64(&jrp->rregs->outring_base); - dma_unmap_single(dev, outbusaddr, - sizeof(struct jr_outentry) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); - dma_unmap_single(dev, inpbusaddr, sizeof(dma_addr_t) * JOBR_DEPTH, - DMA_BIDIRECTIONAL); - kfree(jrp->outring); - kfree(jrp->inpring); + dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH, + jrp->inpring, inpbusaddr); + dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, + jrp->outring, outbusaddr); kfree(jrp->entinfo); return ret; -- cgit v0.10.2 From 82c2f9607b8a4667e9d89613478748f4e2b7288b Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Wed, 11 Jul 2012 11:06:11 +0800 Subject: crypto: caam - ERA retrieval and printing for SEC device This patch adds support for retrieving and printing of SEC ERA information. It is useful for knowing beforehand what features exist from the SEC point of view on a certain SoC. Only era-s 1 to 4 are currently supported; other eras will appear as unknown. Signed-off-by: Alex Porosanu - rebased onto current cryptodev master - made caam_eras static Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ac6abb3..414ba20 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -11,6 +11,7 @@ #include "jr.h" #include "desc_constr.h" #include "error.h" +#include "ctrl.h" static int caam_remove(struct platform_device *pdev) { @@ -155,10 +156,44 @@ static void kick_trng(struct platform_device *pdev) clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); } +/** + * caam_get_era() - Return the ERA of the SEC on SoC, based + * on the SEC_VID register. + * Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown. + * @caam_id - the value of the SEC_VID register + **/ +int caam_get_era(u64 caam_id) +{ + struct sec_vid *sec_vid = (struct sec_vid *)&caam_id; + static const struct { + u16 ip_id; + u8 maj_rev; + u8 era; + } caam_eras[] = { + {0x0A10, 1, 1}, + {0x0A10, 2, 2}, + {0x0A12, 1, 3}, + {0x0A14, 1, 3}, + {0x0A14, 2, 4}, + {0x0A16, 1, 4}, + {0x0A11, 1, 4} + }; + int i; + + for (i = 0; i < ARRAY_SIZE(caam_eras); i++) + if (caam_eras[i].ip_id == sec_vid->ip_id && + caam_eras[i].maj_rev == sec_vid->maj_rev) + return caam_eras[i].era; + + return -ENOTSUPP; +} +EXPORT_SYMBOL(caam_get_era); + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { int ret, ring, rspec; + u64 caam_id; struct device *dev; struct device_node *nprop, *np; struct caam_ctrl __iomem *ctrl; @@ -276,9 +311,11 @@ static int caam_probe(struct platform_device *pdev) /* Initialize queue allocator lock */ spin_lock_init(&ctrlpriv->jr_alloc_lock); + caam_id = rd_reg64(&topregs->ctrl.perfmon.caam_id); + /* Report "alive" for developer to see */ - dev_info(dev, "device ID = 0x%016llx\n", - rd_reg64(&topregs->ctrl.perfmon.caam_id)); + dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id, + caam_get_era(caam_id)); dev_info(dev, "job rings = %d, qi = %d\n", ctrlpriv->total_jobrs, ctrlpriv->qi_present); diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h new file mode 100644 index 0000000..980d44e --- /dev/null +++ b/drivers/crypto/caam/ctrl.h @@ -0,0 +1,13 @@ +/* + * CAAM control-plane driver backend public-level include definitions + * + * Copyright 2012 Freescale Semiconductor, Inc. + */ + +#ifndef CTRL_H +#define CTRL_H + +/* Prototypes for backend-level services exposed to APIs */ +int caam_get_era(u64 caam_id); + +#endif /* CTRL_H */ diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 6d9f1d9..3223fc6 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -117,6 +117,12 @@ struct jr_outentry { #define CHA_NUM_DECONUM_SHIFT 56 #define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) +struct sec_vid { + u16 ip_id; + u8 maj_rev; + u8 min_rev; +}; + struct caam_perfmon { /* Performance Monitor Registers f00-f9f */ u64 req_dequeued; /* PC_REQ_DEQ - Dequeued Requests */ -- cgit v0.10.2 From bf084d8f6eb4ded3f90a6ab79bb682db00ebfbd4 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 28 Jun 2012 17:26:02 +0200 Subject: crypto: aesni-intel - fix wrong kfree pointer kfree(new_key_mem) in rfc4106_set_key() should be called on malloced pointer, not on aligned one, otherwise it can cause invalid pointer on free. (Seen at least once when running tcrypt tests with debug kernel.) Signed-off-by: Milan Broz Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index d662615..34fdcff 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -529,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); struct aesni_rfc4106_gcm_ctx *child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child); - u8 *new_key_mem = NULL; + u8 *new_key_align, *new_key_mem = NULL; if (key_len < 4) { crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); @@ -553,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, if (!new_key_mem) return -ENOMEM; - new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); - memcpy(new_key_mem, key, key_len); - key = new_key_mem; + new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); + memcpy(new_key_align, key, key_len); + key = new_key_align; } if (!irq_fpu_usable()) -- cgit v0.10.2 From b329669ea0b5b02efd41f94372bcf0e988814af4 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Fri, 29 Jun 2012 09:43:26 +0900 Subject: hwrng: exynos - Add support for Exynos random number generator This patch supports Exynos SOC's PRNG driver. Exynos's PRNG has 5 seeds and 5 random number outputs. Module is excuted under runtime power management control, so it activates only while it's in use. Otherwise it will be suspended generally. It was tested on PQ board by rngtest program. Signed-off-by: Jonghwa Lee Signed-off-by: Kyungmin Park Reviewed-by: Stephen Boyd Signed-off-by: Herbert Xu diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index f45dad3..b01d673 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -263,3 +263,15 @@ config HW_RANDOM_PSERIES module will be called pseries-rng. If unsure, say Y. + +config HW_RANDOM_EXYNOS + tristate "EXYNOS HW random number generator support" + depends on HW_RANDOM && HAS_IOMEM && HAVE_CLK + ---help--- + This driver provides kernel-side support for the Random Number + Generator hardware found on EXYNOS SOCs. + + To compile this driver as a module, choose M here: the + module will be called exynos-rng. + + If unsure, say Y. diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index d901dfa..8d6d173 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o obj-$(CONFIG_HW_RANDOM_PICOXCELL) += picoxcell-rng.o obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o +obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c new file mode 100644 index 0000000..232ba9c --- /dev/null +++ b/drivers/char/hw_random/exynos-rng.c @@ -0,0 +1,182 @@ +/* + * exynos-rng.c - Random Number Generator driver for the exynos + * + * Copyright (C) 2012 Samsung Electronics + * Jonghwa Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EXYNOS_PRNG_STATUS_OFFSET 0x10 +#define EXYNOS_PRNG_SEED_OFFSET 0x140 +#define EXYNOS_PRNG_OUT1_OFFSET 0x160 +#define SEED_SETTING_DONE BIT(1) +#define PRNG_START 0x18 +#define PRNG_DONE BIT(5) +#define EXYNOS_AUTOSUSPEND_DELAY 100 + +struct exynos_rng { + struct device *dev; + struct hwrng rng; + void __iomem *mem; + struct clk *clk; +}; + +static u32 exynos_rng_readl(struct exynos_rng *rng, u32 offset) +{ + return __raw_readl(rng->mem + offset); +} + +static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset) +{ + __raw_writel(val, rng->mem + offset); +} + +static int exynos_init(struct hwrng *rng) +{ + struct exynos_rng *exynos_rng = container_of(rng, + struct exynos_rng, rng); + int i; + int ret = 0; + + pm_runtime_get_sync(exynos_rng->dev); + + for (i = 0 ; i < 5 ; i++) + exynos_rng_writel(exynos_rng, jiffies, + EXYNOS_PRNG_SEED_OFFSET + 4*i); + + if (!(exynos_rng_readl(exynos_rng, EXYNOS_PRNG_STATUS_OFFSET) + & SEED_SETTING_DONE)) + ret = -EIO; + + pm_runtime_put_noidle(exynos_rng->dev); + + return ret; +} + +static int exynos_read(struct hwrng *rng, void *buf, + size_t max, bool wait) +{ + struct exynos_rng *exynos_rng = container_of(rng, + struct exynos_rng, rng); + u32 *data = buf; + + pm_runtime_get_sync(exynos_rng->dev); + + exynos_rng_writel(exynos_rng, PRNG_START, 0); + + while (!(exynos_rng_readl(exynos_rng, + EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE)) + cpu_relax(); + + exynos_rng_writel(exynos_rng, PRNG_DONE, EXYNOS_PRNG_STATUS_OFFSET); + + *data = exynos_rng_readl(exynos_rng, EXYNOS_PRNG_OUT1_OFFSET); + + pm_runtime_mark_last_busy(exynos_rng->dev); + pm_runtime_autosuspend(exynos_rng->dev); + + return 4; +} + +static int __devinit exynos_rng_probe(struct platform_device *pdev) +{ + struct exynos_rng *exynos_rng; + + exynos_rng = devm_kzalloc(&pdev->dev, sizeof(struct exynos_rng), + GFP_KERNEL); + if (!exynos_rng) + return -ENOMEM; + + exynos_rng->dev = &pdev->dev; + exynos_rng->rng.name = "exynos"; + exynos_rng->rng.init = exynos_init; + exynos_rng->rng.read = exynos_read; + exynos_rng->clk = devm_clk_get(&pdev->dev, "secss"); + if (IS_ERR(exynos_rng->clk)) { + dev_err(&pdev->dev, "Couldn't get clock.\n"); + return -ENOENT; + } + + exynos_rng->mem = devm_request_and_ioremap(&pdev->dev, + platform_get_resource(pdev, IORESOURCE_MEM, 0)); + if (!exynos_rng->mem) + return -EBUSY; + + platform_set_drvdata(pdev, exynos_rng); + + pm_runtime_set_autosuspend_delay(&pdev->dev, EXYNOS_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + return hwrng_register(&exynos_rng->rng); +} + +static int __devexit exynos_rng_remove(struct platform_device *pdev) +{ + struct exynos_rng *exynos_rng = platform_get_drvdata(pdev); + + hwrng_unregister(&exynos_rng->rng); + + return 0; +} + +static int exynos_rng_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct exynos_rng *exynos_rng = platform_get_drvdata(pdev); + + clk_disable_unprepare(exynos_rng->clk); + + return 0; +} + +static int exynos_rng_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct exynos_rng *exynos_rng = platform_get_drvdata(pdev); + + return clk_prepare_enable(exynos_rng->clk); +} + + +UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend, + exynos_rng_runtime_resume, NULL); + +static struct platform_driver exynos_rng_driver = { + .driver = { + .name = "exynos-rng", + .owner = THIS_MODULE, + .pm = &exynos_rng_pm_ops, + }, + .probe = exynos_rng_probe, + .remove = __devexit_p(exynos_rng_remove), +}; + +module_platform_driver(exynos_rng_driver); + +MODULE_DESCRIPTION("EXYNOS 4 H/W Random Number Generator driver"); +MODULE_AUTHOR("Jonghwa Lee "); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 6c79294f44fd7d1122cbaabff3b9815b074c0dd0 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 29 Jun 2012 22:08:09 +0200 Subject: crypto: testmgr - allow aesni-intel and ghash_clmulni-intel in fips mode Patch 863b557a88f8c033f7419fabafef4712a5055f85 added NULL entries for intel accelerated drivers but did not marked these fips allowed. This cause panic if running tests with fips=1. For ghash, fips_allowed flag was added in patch 18c0ebd2d8194cce4b3f67e2903fa01bea892cbc. Without patch, "modprobe tcrypt" fails with alg: skcipher: Failed to load transform for cbc-aes-aesni: -2 cbc-aes-aesni: cbc(aes) alg self test failed in fips mode! (panic) Also add missing cryptd(__driver-cbc-aes-aesni) and cryptd(__driver-gcm-aes-aesni) test to complement null tests above, otherwise system complains with alg: No test for __cbc-aes-aesni (cryptd(__driver-cbc-aes-aesni)) alg: No test for __gcm-aes-aesni (cryptd(__driver-gcm-aes-aesni)) Signed-off-by: Milan Broz Signed-off-by: Paul Wouters Signed-off-by: Herbert Xu diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 36748a5..4308a11 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1581,6 +1581,7 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "__driver-cbc-aes-aesni", .test = alg_test_null, + .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1641,6 +1642,7 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "__driver-ecb-aes-aesni", .test = alg_test_null, + .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1701,6 +1703,7 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "__ghash-pclmulqdqni", .test = alg_test_null, + .fips_allowed = 1, .suite = { .hash = { .vecs = NULL, @@ -1866,8 +1869,25 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "cryptd(__driver-cbc-aes-aesni)", + .test = alg_test_null, + .fips_allowed = 1, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "cryptd(__driver-ecb-aes-aesni)", .test = alg_test_null, + .fips_allowed = 1, .suite = { .cipher = { .enc = { @@ -1926,8 +1946,25 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "cryptd(__driver-gcm-aes-aesni)", + .test = alg_test_null, + .fips_allowed = 1, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "cryptd(__ghash-pclmulqdqni)", .test = alg_test_null, + .fips_allowed = 1, .suite = { .hash = { .vecs = NULL, @@ -2043,6 +2080,7 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ecb(__aes-aesni)", .test = alg_test_null, + .fips_allowed = 1, .suite = { .cipher = { .enc = { -- cgit v0.10.2 From 815e972110052e8da68b5b5298ca2cd69cb7c3c0 Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Sun, 1 Jul 2012 19:19:43 +0200 Subject: ARM: AT91SAM9G45: add crypto peripherals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c index 4792682..da6dc0f 100644 --- a/arch/arm/mach-at91/at91sam9g45.c +++ b/arch/arm/mach-at91/at91sam9g45.c @@ -182,6 +182,13 @@ static struct clk adc_op_clk = { .rate_hz = 13200000, }; +/* AES/TDES/SHA clock - Only for sam9m11/sam9g56 */ +static struct clk aestdessha_clk = { + .name = "aestdessha_clk", + .pmc_mask = 1 << AT91SAM9G45_ID_AESTDESSHA, + .type = CLK_TYPE_PERIPHERAL, +}; + static struct clk *periph_clocks[] __initdata = { &pioA_clk, &pioB_clk, @@ -211,6 +218,7 @@ static struct clk *periph_clocks[] __initdata = { &udphs_clk, &mmc1_clk, &adc_op_clk, + &aestdessha_clk, // irq0 }; @@ -231,6 +239,9 @@ static struct clk_lookup periph_clocks_lookups[] = { CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), CLKDEV_CON_DEV_ID(NULL, "atmel-trng", &trng_clk), + CLKDEV_CON_DEV_ID(NULL, "atmel_sha", &aestdessha_clk), + CLKDEV_CON_DEV_ID(NULL, "atmel_tdes", &aestdessha_clk), + CLKDEV_CON_DEV_ID(NULL, "atmel_aes", &aestdessha_clk), /* more usart lookup table for DT entries */ CLKDEV_CON_DEV_ID("usart", "ffffee00.serial", &mck), CLKDEV_CON_DEV_ID("usart", "fff8c000.serial", &usart0_clk), @@ -387,7 +398,7 @@ static unsigned int at91sam9g45_default_irq_priority[NR_AIC_IRQS] __initdata = { 3, /* Ethernet */ 0, /* Image Sensor Interface */ 2, /* USB Device High speed port */ - 0, + 0, /* AESTDESSHA Crypto HW Accelerators */ 0, /* Multimedia Card Interface 1 */ 0, 0, /* Advanced Interrupt Controller (IRQ0) */ diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 933fc9a..7102f62 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -1830,6 +1831,130 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) {} void __init at91_add_device_serial(void) {} #endif +/* -------------------------------------------------------------------- + * SHA1/SHA256 + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_CRYPTO_DEV_ATMEL_SHA) || defined(CONFIG_CRYPTO_DEV_ATMEL_SHA_MODULE) +static struct resource sha_resources[] = { + { + .start = AT91SAM9G45_BASE_SHA, + .end = AT91SAM9G45_BASE_SHA + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9G45_ID_AESTDESSHA, + .end = AT91SAM9G45_ID_AESTDESSHA, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9g45_sha_device = { + .name = "atmel_sha", + .id = -1, + .resource = sha_resources, + .num_resources = ARRAY_SIZE(sha_resources), +}; + +static void __init at91_add_device_sha(void) +{ + platform_device_register(&at91sam9g45_sha_device); +} +#else +static void __init at91_add_device_sha(void) {} +#endif + +/* -------------------------------------------------------------------- + * DES/TDES + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_CRYPTO_DEV_ATMEL_TDES) || defined(CONFIG_CRYPTO_DEV_ATMEL_TDES_MODULE) +static struct resource tdes_resources[] = { + [0] = { + .start = AT91SAM9G45_BASE_TDES, + .end = AT91SAM9G45_BASE_TDES + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9G45_ID_AESTDESSHA, + .end = AT91SAM9G45_ID_AESTDESSHA, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9g45_tdes_device = { + .name = "atmel_tdes", + .id = -1, + .resource = tdes_resources, + .num_resources = ARRAY_SIZE(tdes_resources), +}; + +static void __init at91_add_device_tdes(void) +{ + platform_device_register(&at91sam9g45_tdes_device); +} +#else +static void __init at91_add_device_tdes(void) {} +#endif + +/* -------------------------------------------------------------------- + * AES + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE) +static struct aes_platform_data aes_data; +static u64 aes_dmamask = DMA_BIT_MASK(32); + +static struct resource aes_resources[] = { + [0] = { + .start = AT91SAM9G45_BASE_AES, + .end = AT91SAM9G45_BASE_AES + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9G45_ID_AESTDESSHA, + .end = AT91SAM9G45_ID_AESTDESSHA, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9g45_aes_device = { + .name = "atmel_aes", + .id = -1, + .dev = { + .dma_mask = &aes_dmamask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &aes_data, + }, + .resource = aes_resources, + .num_resources = ARRAY_SIZE(aes_resources), +}; + +static void __init at91_add_device_aes(void) +{ + struct at_dma_slave *atslave; + struct aes_dma_data *alt_atslave; + + alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL); + + /* DMA TX slave channel configuration */ + atslave = &alt_atslave->txdata; + atslave->dma_dev = &at_hdmac_device.dev; + atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW | + ATC_SRC_PER(AT_DMA_ID_AES_RX); + + /* DMA RX slave channel configuration */ + atslave = &alt_atslave->rxdata; + atslave->dma_dev = &at_hdmac_device.dev; + atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW | + ATC_DST_PER(AT_DMA_ID_AES_TX); + + aes_data.dma_slave = alt_atslave; + platform_device_register(&at91sam9g45_aes_device); +} +#else +static void __init at91_add_device_aes(void) {} +#endif /* -------------------------------------------------------------------- */ /* @@ -1847,6 +1972,9 @@ static int __init at91_add_standard_devices(void) at91_add_device_trng(); at91_add_device_watchdog(); at91_add_device_tc(); + at91_add_device_sha(); + at91_add_device_tdes(); + at91_add_device_aes(); return 0; } diff --git a/arch/arm/mach-at91/include/mach/at91sam9g45.h b/arch/arm/mach-at91/include/mach/at91sam9g45.h index 3a4da24..8eba102 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9g45.h +++ b/arch/arm/mach-at91/include/mach/at91sam9g45.h @@ -136,6 +136,8 @@ #define AT_DMA_ID_SSC1_RX 8 #define AT_DMA_ID_AC97_TX 9 #define AT_DMA_ID_AC97_RX 10 +#define AT_DMA_ID_AES_TX 11 +#define AT_DMA_ID_AES_RX 12 #define AT_DMA_ID_MCI1 13 #endif diff --git a/include/linux/platform_data/atmel-aes.h b/include/linux/platform_data/atmel-aes.h new file mode 100644 index 0000000..e7a1949 --- /dev/null +++ b/include/linux/platform_data/atmel-aes.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_ATMEL_AES_H +#define __LINUX_ATMEL_AES_H + +#include + +/** + * struct aes_dma_data - DMA data for AES + */ +struct aes_dma_data { + struct at_dma_slave txdata; + struct at_dma_slave rxdata; +}; + +/** + * struct aes_platform_data - board-specific AES configuration + * @dma_slave: DMA slave interface to use in data transfers. + */ +struct aes_platform_data { + struct aes_dma_data *dma_slave; +}; + +#endif /* __LINUX_ATMEL_AES_H */ -- cgit v0.10.2 From bd3c7b5c2aba0d806285700848f588ca482094d8 Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Sun, 1 Jul 2012 19:19:44 +0200 Subject: crypto: atmel - add Atmel AES driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index be6b2ba..d04eabe 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -332,4 +332,21 @@ config CRYPTO_DEV_BFIN_CRC Newer Blackfin processors have CRC hardware. Select this if you want to use the Blackfin CRC module. +config CRYPTO_DEV_ATMEL_AES + tristate "Support for Atmel AES hw accelerator" + depends on ARCH_AT91 + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_AES + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + select CONFIG_AT_HDMAC + help + Some Atmel processors have AES hw accelerator. + Select this if you want to use the Atmel module for + AES algorithms. + + To compile this driver as a module, choose M here: the module + will be called atmel-aes. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 1c5a145..44a8147 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ +obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h new file mode 100644 index 0000000..2786bb1 --- /dev/null +++ b/drivers/crypto/atmel-aes-regs.h @@ -0,0 +1,62 @@ +#ifndef __ATMEL_AES_REGS_H__ +#define __ATMEL_AES_REGS_H__ + +#define AES_CR 0x00 +#define AES_CR_START (1 << 0) +#define AES_CR_SWRST (1 << 8) +#define AES_CR_LOADSEED (1 << 16) + +#define AES_MR 0x04 +#define AES_MR_CYPHER_DEC (0 << 0) +#define AES_MR_CYPHER_ENC (1 << 0) +#define AES_MR_DUALBUFF (1 << 3) +#define AES_MR_PROCDLY_MASK (0xF << 4) +#define AES_MR_PROCDLY_OFFSET 4 +#define AES_MR_SMOD_MASK (0x3 << 8) +#define AES_MR_SMOD_MANUAL (0x0 << 8) +#define AES_MR_SMOD_AUTO (0x1 << 8) +#define AES_MR_SMOD_IDATAR0 (0x2 << 8) +#define AES_MR_KEYSIZE_MASK (0x3 << 10) +#define AES_MR_KEYSIZE_128 (0x0 << 10) +#define AES_MR_KEYSIZE_192 (0x1 << 10) +#define AES_MR_KEYSIZE_256 (0x2 << 10) +#define AES_MR_OPMOD_MASK (0x7 << 12) +#define AES_MR_OPMOD_ECB (0x0 << 12) +#define AES_MR_OPMOD_CBC (0x1 << 12) +#define AES_MR_OPMOD_OFB (0x2 << 12) +#define AES_MR_OPMOD_CFB (0x3 << 12) +#define AES_MR_OPMOD_CTR (0x4 << 12) +#define AES_MR_LOD (0x1 << 15) +#define AES_MR_CFBS_MASK (0x7 << 16) +#define AES_MR_CFBS_128b (0x0 << 16) +#define AES_MR_CFBS_64b (0x1 << 16) +#define AES_MR_CFBS_32b (0x2 << 16) +#define AES_MR_CFBS_16b (0x3 << 16) +#define AES_MR_CFBS_8b (0x4 << 16) +#define AES_MR_CKEY_MASK (0xF << 20) +#define AES_MR_CKEY_OFFSET 20 +#define AES_MR_CMTYP_MASK (0x1F << 24) +#define AES_MR_CMTYP_OFFSET 24 + +#define AES_IER 0x10 +#define AES_IDR 0x14 +#define AES_IMR 0x18 +#define AES_ISR 0x1C +#define AES_INT_DATARDY (1 << 0) +#define AES_INT_URAD (1 << 8) +#define AES_ISR_URAT_MASK (0xF << 12) +#define AES_ISR_URAT_IDR_WR_PROC (0x0 << 12) +#define AES_ISR_URAT_ODR_RD_PROC (0x1 << 12) +#define AES_ISR_URAT_MR_WR_PROC (0x2 << 12) +#define AES_ISR_URAT_ODR_RD_SUBK (0x3 << 12) +#define AES_ISR_URAT_MR_WR_SUBK (0x4 << 12) +#define AES_ISR_URAT_WOR_RD (0x5 << 12) + +#define AES_KEYWR(x) (0x20 + ((x) * 0x04)) +#define AES_IDATAR(x) (0x40 + ((x) * 0x04)) +#define AES_ODATAR(x) (0x50 + ((x) * 0x04)) +#define AES_IVR(x) (0x60 + ((x) * 0x04)) + +#define AES_HW_VERSION 0xFC + +#endif /* __ATMEL_AES_REGS_H__ */ diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c new file mode 100644 index 0000000..6bb20ff --- /dev/null +++ b/drivers/crypto/atmel-aes.c @@ -0,0 +1,1206 @@ +/* + * Cryptographic API. + * + * Support for ATMEL AES HW acceleration. + * + * Copyright (c) 2012 Eukréa Electromatique - ATMEL + * Author: Nicolas Royer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Some ideas are from omap-aes.c driver. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "atmel-aes-regs.h" + +#define CFB8_BLOCK_SIZE 1 +#define CFB16_BLOCK_SIZE 2 +#define CFB32_BLOCK_SIZE 4 +#define CFB64_BLOCK_SIZE 8 + +/* AES flags */ +#define AES_FLAGS_MODE_MASK 0x01ff +#define AES_FLAGS_ENCRYPT BIT(0) +#define AES_FLAGS_CBC BIT(1) +#define AES_FLAGS_CFB BIT(2) +#define AES_FLAGS_CFB8 BIT(3) +#define AES_FLAGS_CFB16 BIT(4) +#define AES_FLAGS_CFB32 BIT(5) +#define AES_FLAGS_CFB64 BIT(6) +#define AES_FLAGS_OFB BIT(7) +#define AES_FLAGS_CTR BIT(8) + +#define AES_FLAGS_INIT BIT(16) +#define AES_FLAGS_DMA BIT(17) +#define AES_FLAGS_BUSY BIT(18) + +#define AES_FLAGS_DUALBUFF BIT(24) + +#define ATMEL_AES_QUEUE_LENGTH 1 +#define ATMEL_AES_CACHE_SIZE 0 + +#define ATMEL_AES_DMA_THRESHOLD 16 + + +struct atmel_aes_dev; + +struct atmel_aes_ctx { + struct atmel_aes_dev *dd; + + int keylen; + u32 key[AES_KEYSIZE_256 / sizeof(u32)]; +}; + +struct atmel_aes_reqctx { + unsigned long mode; +}; + +struct atmel_aes_dma { + struct dma_chan *chan; + struct dma_slave_config dma_conf; +}; + +struct atmel_aes_dev { + struct list_head list; + unsigned long phys_base; + void __iomem *io_base; + + struct atmel_aes_ctx *ctx; + struct device *dev; + struct clk *iclk; + int irq; + + unsigned long flags; + int err; + + spinlock_t lock; + struct crypto_queue queue; + + struct tasklet_struct done_task; + struct tasklet_struct queue_task; + + struct ablkcipher_request *req; + size_t total; + + struct scatterlist *in_sg; + unsigned int nb_in_sg; + + struct scatterlist *out_sg; + unsigned int nb_out_sg; + + size_t bufcnt; + + u8 buf_in[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); + int dma_in; + struct atmel_aes_dma dma_lch_in; + + u8 buf_out[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); + int dma_out; + struct atmel_aes_dma dma_lch_out; + + u32 hw_version; +}; + +struct atmel_aes_drv { + struct list_head dev_list; + spinlock_t lock; +}; + +static struct atmel_aes_drv atmel_aes = { + .dev_list = LIST_HEAD_INIT(atmel_aes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(atmel_aes.lock), +}; + +static int atmel_aes_sg_length(struct ablkcipher_request *req, + struct scatterlist *sg) +{ + unsigned int total = req->nbytes; + int sg_nb; + unsigned int len; + struct scatterlist *sg_list; + + sg_nb = 0; + sg_list = sg; + total = req->nbytes; + + while (total) { + len = min(sg_list->length, total); + + sg_nb++; + total -= len; + + sg_list = sg_next(sg_list); + if (!sg_list) + total = 0; + } + + return sg_nb; +} + +static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset) +{ + return readl_relaxed(dd->io_base + offset); +} + +static inline void atmel_aes_write(struct atmel_aes_dev *dd, + u32 offset, u32 value) +{ + writel_relaxed(value, dd->io_base + offset); +} + +static void atmel_aes_read_n(struct atmel_aes_dev *dd, u32 offset, + u32 *value, int count) +{ + for (; count--; value++, offset += 4) + *value = atmel_aes_read(dd, offset); +} + +static void atmel_aes_write_n(struct atmel_aes_dev *dd, u32 offset, + u32 *value, int count) +{ + for (; count--; value++, offset += 4) + atmel_aes_write(dd, offset, *value); +} + +static void atmel_aes_dualbuff_test(struct atmel_aes_dev *dd) +{ + atmel_aes_write(dd, AES_MR, AES_MR_DUALBUFF); + + if (atmel_aes_read(dd, AES_MR) & AES_MR_DUALBUFF) + dd->flags |= AES_FLAGS_DUALBUFF; +} + +static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) +{ + struct atmel_aes_dev *aes_dd = NULL; + struct atmel_aes_dev *tmp; + + spin_lock_bh(&atmel_aes.lock); + if (!ctx->dd) { + list_for_each_entry(tmp, &atmel_aes.dev_list, list) { + aes_dd = tmp; + break; + } + ctx->dd = aes_dd; + } else { + aes_dd = ctx->dd; + } + + spin_unlock_bh(&atmel_aes.lock); + + return aes_dd; +} + +static int atmel_aes_hw_init(struct atmel_aes_dev *dd) +{ + clk_prepare_enable(dd->iclk); + + if (!(dd->flags & AES_FLAGS_INIT)) { + atmel_aes_write(dd, AES_CR, AES_CR_SWRST); + atmel_aes_dualbuff_test(dd); + dd->flags |= AES_FLAGS_INIT; + dd->err = 0; + } + + return 0; +} + +static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd) +{ + atmel_aes_hw_init(dd); + + dd->hw_version = atmel_aes_read(dd, AES_HW_VERSION); + + clk_disable_unprepare(dd->iclk); +} + +static void atmel_aes_finish_req(struct atmel_aes_dev *dd, int err) +{ + struct ablkcipher_request *req = dd->req; + + clk_disable_unprepare(dd->iclk); + dd->flags &= ~AES_FLAGS_BUSY; + + req->base.complete(&req->base, err); +} + +static void atmel_aes_dma_callback(void *data) +{ + struct atmel_aes_dev *dd = data; + + /* dma_lch_out - completed */ + tasklet_schedule(&dd->done_task); +} + +static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd) +{ + struct dma_async_tx_descriptor *in_desc, *out_desc; + int nb_dma_sg_in, nb_dma_sg_out; + + dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); + if (!dd->nb_in_sg) + goto exit_err; + + nb_dma_sg_in = dma_map_sg(dd->dev, dd->in_sg, dd->nb_in_sg, + DMA_TO_DEVICE); + if (!nb_dma_sg_in) + goto exit_err; + + in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, dd->in_sg, + nb_dma_sg_in, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + if (!in_desc) + goto unmap_in; + + /* callback not needed */ + + dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg); + if (!dd->nb_out_sg) + goto unmap_in; + + nb_dma_sg_out = dma_map_sg(dd->dev, dd->out_sg, dd->nb_out_sg, + DMA_FROM_DEVICE); + if (!nb_dma_sg_out) + goto unmap_out; + + out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, dd->out_sg, + nb_dma_sg_out, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + if (!out_desc) + goto unmap_out; + + out_desc->callback = atmel_aes_dma_callback; + out_desc->callback_param = dd; + + dd->total -= dd->req->nbytes; + + dmaengine_submit(out_desc); + dma_async_issue_pending(dd->dma_lch_out.chan); + + dmaengine_submit(in_desc); + dma_async_issue_pending(dd->dma_lch_in.chan); + + return 0; + +unmap_out: + dma_unmap_sg(dd->dev, dd->out_sg, dd->nb_out_sg, + DMA_FROM_DEVICE); +unmap_in: + dma_unmap_sg(dd->dev, dd->in_sg, dd->nb_in_sg, + DMA_TO_DEVICE); +exit_err: + return -EINVAL; +} + +static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) +{ + dd->flags &= ~AES_FLAGS_DMA; + + /* use cache buffers */ + dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); + if (!dd->nb_in_sg) + return -EINVAL; + + dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg); + if (!dd->nb_in_sg) + return -EINVAL; + + dd->bufcnt = sg_copy_to_buffer(dd->in_sg, dd->nb_in_sg, + dd->buf_in, dd->total); + + if (!dd->bufcnt) + return -EINVAL; + + dd->total -= dd->bufcnt; + + atmel_aes_write(dd, AES_IER, AES_INT_DATARDY); + atmel_aes_write_n(dd, AES_IDATAR(0), (u32 *) dd->buf_in, + dd->bufcnt >> 2); + + return 0; +} + +static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) +{ + int err; + + if (dd->flags & AES_FLAGS_CFB8) { + dd->dma_lch_in.dma_conf.dst_addr_width = + DMA_SLAVE_BUSWIDTH_1_BYTE; + dd->dma_lch_out.dma_conf.src_addr_width = + DMA_SLAVE_BUSWIDTH_1_BYTE; + } else if (dd->flags & AES_FLAGS_CFB16) { + dd->dma_lch_in.dma_conf.dst_addr_width = + DMA_SLAVE_BUSWIDTH_2_BYTES; + dd->dma_lch_out.dma_conf.src_addr_width = + DMA_SLAVE_BUSWIDTH_2_BYTES; + } else { + dd->dma_lch_in.dma_conf.dst_addr_width = + DMA_SLAVE_BUSWIDTH_4_BYTES; + dd->dma_lch_out.dma_conf.src_addr_width = + DMA_SLAVE_BUSWIDTH_4_BYTES; + } + + dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); + dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf); + + dd->flags |= AES_FLAGS_DMA; + err = atmel_aes_crypt_dma(dd); + + return err; +} + +static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd) +{ + int err; + u32 valcr = 0, valmr = 0; + + err = atmel_aes_hw_init(dd); + + if (err) + return err; + + /* MR register must be set before IV registers */ + if (dd->ctx->keylen == AES_KEYSIZE_128) + valmr |= AES_MR_KEYSIZE_128; + else if (dd->ctx->keylen == AES_KEYSIZE_192) + valmr |= AES_MR_KEYSIZE_192; + else + valmr |= AES_MR_KEYSIZE_256; + + if (dd->flags & AES_FLAGS_CBC) { + valmr |= AES_MR_OPMOD_CBC; + } else if (dd->flags & AES_FLAGS_CFB) { + valmr |= AES_MR_OPMOD_CFB; + if (dd->flags & AES_FLAGS_CFB8) + valmr |= AES_MR_CFBS_8b; + else if (dd->flags & AES_FLAGS_CFB16) + valmr |= AES_MR_CFBS_16b; + else if (dd->flags & AES_FLAGS_CFB32) + valmr |= AES_MR_CFBS_32b; + else if (dd->flags & AES_FLAGS_CFB64) + valmr |= AES_MR_CFBS_64b; + } else if (dd->flags & AES_FLAGS_OFB) { + valmr |= AES_MR_OPMOD_OFB; + } else if (dd->flags & AES_FLAGS_CTR) { + valmr |= AES_MR_OPMOD_CTR; + } else { + valmr |= AES_MR_OPMOD_ECB; + } + + if (dd->flags & AES_FLAGS_ENCRYPT) + valmr |= AES_MR_CYPHER_ENC; + + if (dd->total > ATMEL_AES_DMA_THRESHOLD) { + valmr |= AES_MR_SMOD_IDATAR0; + if (dd->flags & AES_FLAGS_DUALBUFF) + valmr |= AES_MR_DUALBUFF; + } else { + valmr |= AES_MR_SMOD_AUTO; + } + + atmel_aes_write(dd, AES_CR, valcr); + atmel_aes_write(dd, AES_MR, valmr); + + atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key, + dd->ctx->keylen >> 2); + + if (((dd->flags & AES_FLAGS_CBC) || (dd->flags & AES_FLAGS_CFB) || + (dd->flags & AES_FLAGS_OFB) || (dd->flags & AES_FLAGS_CTR)) && + dd->req->info) { + atmel_aes_write_n(dd, AES_IVR(0), dd->req->info, 4); + } + + return 0; +} + +static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, + struct ablkcipher_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct atmel_aes_ctx *ctx; + struct atmel_aes_reqctx *rctx; + unsigned long flags; + int err, ret = 0; + + spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ablkcipher_enqueue_request(&dd->queue, req); + if (dd->flags & AES_FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&dd->queue); + async_req = crypto_dequeue_request(&dd->queue); + if (async_req) + dd->flags |= AES_FLAGS_BUSY; + spin_unlock_irqrestore(&dd->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ablkcipher_request_cast(async_req); + + /* assign new request to device */ + dd->req = req; + dd->total = req->nbytes; + dd->in_sg = req->src; + dd->out_sg = req->dst; + + rctx = ablkcipher_request_ctx(req); + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx->mode &= AES_FLAGS_MODE_MASK; + dd->flags = (dd->flags & ~AES_FLAGS_MODE_MASK) | rctx->mode; + dd->ctx = ctx; + ctx->dd = dd; + + err = atmel_aes_write_ctrl(dd); + if (!err) { + if (dd->total > ATMEL_AES_DMA_THRESHOLD) + err = atmel_aes_crypt_dma_start(dd); + else + err = atmel_aes_crypt_cpu_start(dd); + } + if (err) { + /* aes_task will not finish it, so do it here */ + atmel_aes_finish_req(dd, err); + tasklet_schedule(&dd->queue_task); + } + + return ret; +} + +static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) +{ + int err = -EINVAL; + + if (dd->flags & AES_FLAGS_DMA) { + dma_unmap_sg(dd->dev, dd->out_sg, + dd->nb_out_sg, DMA_FROM_DEVICE); + dma_unmap_sg(dd->dev, dd->in_sg, + dd->nb_in_sg, DMA_TO_DEVICE); + err = 0; + } + + return err; +} + +static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode) +{ + struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req); + struct atmel_aes_dev *dd; + + if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { + pr_err("request size is not exact amount of AES blocks\n"); + return -EINVAL; + } + + dd = atmel_aes_find_dev(ctx); + if (!dd) + return -ENODEV; + + rctx->mode = mode; + + return atmel_aes_handle_queue(dd, req); +} + +static bool atmel_aes_filter(struct dma_chan *chan, void *slave) +{ + struct at_dma_slave *sl = slave; + + if (sl && sl->dma_dev == chan->device->dev) { + chan->private = sl; + return true; + } else { + return false; + } +} + +static int atmel_aes_dma_init(struct atmel_aes_dev *dd) +{ + int err = -ENOMEM; + struct aes_platform_data *pdata; + dma_cap_mask_t mask_in, mask_out; + + pdata = dd->dev->platform_data; + + if (pdata && pdata->dma_slave->txdata.dma_dev && + pdata->dma_slave->rxdata.dma_dev) { + + /* Try to grab 2 DMA channels */ + dma_cap_zero(mask_in); + dma_cap_set(DMA_SLAVE, mask_in); + + dd->dma_lch_in.chan = dma_request_channel(mask_in, + atmel_aes_filter, &pdata->dma_slave->rxdata); + if (!dd->dma_lch_in.chan) + goto err_dma_in; + + dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; + dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + + AES_IDATAR(0); + dd->dma_lch_in.dma_conf.src_maxburst = 1; + dd->dma_lch_in.dma_conf.dst_maxburst = 1; + dd->dma_lch_in.dma_conf.device_fc = false; + + dma_cap_zero(mask_out); + dma_cap_set(DMA_SLAVE, mask_out); + dd->dma_lch_out.chan = dma_request_channel(mask_out, + atmel_aes_filter, &pdata->dma_slave->txdata); + if (!dd->dma_lch_out.chan) + goto err_dma_out; + + dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM; + dd->dma_lch_out.dma_conf.src_addr = dd->phys_base + + AES_ODATAR(0); + dd->dma_lch_out.dma_conf.src_maxburst = 1; + dd->dma_lch_out.dma_conf.dst_maxburst = 1; + dd->dma_lch_out.dma_conf.device_fc = false; + + return 0; + } else { + return -ENODEV; + } + +err_dma_out: + dma_release_channel(dd->dma_lch_in.chan); +err_dma_in: + return err; +} + +static void atmel_aes_dma_cleanup(struct atmel_aes_dev *dd) +{ + dma_release_channel(dd->dma_lch_in.chan); + dma_release_channel(dd->dma_lch_out.chan); +} + +static int atmel_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_256) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, keylen); + ctx->keylen = keylen; + + return 0; +} + +static int atmel_aes_ecb_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT); +} + +static int atmel_aes_ecb_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + 0); +} + +static int atmel_aes_cbc_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); +} + +static int atmel_aes_cbc_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CBC); +} + +static int atmel_aes_ofb_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_OFB); +} + +static int atmel_aes_ofb_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_OFB); +} + +static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CFB); +} + +static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CFB); +} + +static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB64); +} + +static int atmel_aes_cfb64_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CFB | AES_FLAGS_CFB64); +} + +static int atmel_aes_cfb32_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB32); +} + +static int atmel_aes_cfb32_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CFB | AES_FLAGS_CFB32); +} + +static int atmel_aes_cfb16_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB16); +} + +static int atmel_aes_cfb16_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CFB | AES_FLAGS_CFB16); +} + +static int atmel_aes_cfb8_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB8); +} + +static int atmel_aes_cfb8_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CFB | AES_FLAGS_CFB8); +} + +static int atmel_aes_ctr_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_ENCRYPT | AES_FLAGS_CTR); +} + +static int atmel_aes_ctr_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, + AES_FLAGS_CTR); +} + +static int atmel_aes_cra_init(struct crypto_tfm *tfm) +{ + tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx); + + return 0; +} + +static void atmel_aes_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct crypto_alg aes_algs[] = { +{ + .cra_name = "ecb(aes)", + .cra_driver_name = "atmel-ecb-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_ecb_encrypt, + .decrypt = atmel_aes_ecb_decrypt, + } +}, +{ + .cra_name = "cbc(aes)", + .cra_driver_name = "atmel-cbc-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cbc_encrypt, + .decrypt = atmel_aes_cbc_decrypt, + } +}, +{ + .cra_name = "ofb(aes)", + .cra_driver_name = "atmel-ofb-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_ofb_encrypt, + .decrypt = atmel_aes_ofb_decrypt, + } +}, +{ + .cra_name = "cfb(aes)", + .cra_driver_name = "atmel-cfb-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cfb_encrypt, + .decrypt = atmel_aes_cfb_decrypt, + } +}, +{ + .cra_name = "cfb32(aes)", + .cra_driver_name = "atmel-cfb32-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cfb32_encrypt, + .decrypt = atmel_aes_cfb32_decrypt, + } +}, +{ + .cra_name = "cfb16(aes)", + .cra_driver_name = "atmel-cfb16-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB16_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cfb16_encrypt, + .decrypt = atmel_aes_cfb16_decrypt, + } +}, +{ + .cra_name = "cfb8(aes)", + .cra_driver_name = "atmel-cfb8-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB64_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cfb8_encrypt, + .decrypt = atmel_aes_cfb8_decrypt, + } +}, +{ + .cra_name = "ctr(aes)", + .cra_driver_name = "atmel-ctr-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_ctr_encrypt, + .decrypt = atmel_aes_ctr_decrypt, + } +}, +}; + +static struct crypto_alg aes_cfb64_alg[] = { +{ + .cra_name = "cfb64(aes)", + .cra_driver_name = "atmel-cfb64-aes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB64_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_ctx), + .cra_alignmask = 0x0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_setkey, + .encrypt = atmel_aes_cfb64_encrypt, + .decrypt = atmel_aes_cfb64_decrypt, + } +}, +}; + +static void atmel_aes_queue_task(unsigned long data) +{ + struct atmel_aes_dev *dd = (struct atmel_aes_dev *)data; + + atmel_aes_handle_queue(dd, NULL); +} + +static void atmel_aes_done_task(unsigned long data) +{ + struct atmel_aes_dev *dd = (struct atmel_aes_dev *) data; + int err; + + if (!(dd->flags & AES_FLAGS_DMA)) { + atmel_aes_read_n(dd, AES_ODATAR(0), (u32 *) dd->buf_out, + dd->bufcnt >> 2); + + if (sg_copy_from_buffer(dd->out_sg, dd->nb_out_sg, + dd->buf_out, dd->bufcnt)) + err = 0; + else + err = -EINVAL; + + goto cpu_end; + } + + err = atmel_aes_crypt_dma_stop(dd); + + err = dd->err ? : err; + + if (dd->total && !err) { + err = atmel_aes_crypt_dma_start(dd); + if (!err) + return; /* DMA started. Not fininishing. */ + } + +cpu_end: + atmel_aes_finish_req(dd, err); + atmel_aes_handle_queue(dd, NULL); +} + +static irqreturn_t atmel_aes_irq(int irq, void *dev_id) +{ + struct atmel_aes_dev *aes_dd = dev_id; + u32 reg; + + reg = atmel_aes_read(aes_dd, AES_ISR); + if (reg & atmel_aes_read(aes_dd, AES_IMR)) { + atmel_aes_write(aes_dd, AES_IDR, reg); + if (AES_FLAGS_BUSY & aes_dd->flags) + tasklet_schedule(&aes_dd->done_task); + else + dev_warn(aes_dd->dev, "AES interrupt when no active requests.\n"); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) + crypto_unregister_alg(&aes_algs[i]); + if (dd->hw_version >= 0x130) + crypto_unregister_alg(&aes_cfb64_alg[0]); +} + +static int atmel_aes_register_algs(struct atmel_aes_dev *dd) +{ + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + INIT_LIST_HEAD(&aes_algs[i].cra_list); + err = crypto_register_alg(&aes_algs[i]); + if (err) + goto err_aes_algs; + } + + atmel_aes_hw_version_init(dd); + + if (dd->hw_version >= 0x130) { + INIT_LIST_HEAD(&aes_cfb64_alg[0].cra_list); + err = crypto_register_alg(&aes_cfb64_alg[0]); + if (err) + goto err_aes_cfb64_alg; + } + + return 0; + +err_aes_cfb64_alg: + i = ARRAY_SIZE(aes_algs); +err_aes_algs: + for (j = 0; j < i; j++) + crypto_unregister_alg(&aes_algs[j]); + + return err; +} + +static int __devinit atmel_aes_probe(struct platform_device *pdev) +{ + struct atmel_aes_dev *aes_dd; + struct aes_platform_data *pdata; + struct device *dev = &pdev->dev; + struct resource *aes_res; + unsigned long aes_phys_size; + int err; + + pdata = pdev->dev.platform_data; + if (!pdata) { + err = -ENXIO; + goto aes_dd_err; + } + + aes_dd = kzalloc(sizeof(struct atmel_aes_dev), GFP_KERNEL); + if (aes_dd == NULL) { + dev_err(dev, "unable to alloc data struct.\n"); + err = -ENOMEM; + goto aes_dd_err; + } + + aes_dd->dev = dev; + + platform_set_drvdata(pdev, aes_dd); + + INIT_LIST_HEAD(&aes_dd->list); + + tasklet_init(&aes_dd->done_task, atmel_aes_done_task, + (unsigned long)aes_dd); + tasklet_init(&aes_dd->queue_task, atmel_aes_queue_task, + (unsigned long)aes_dd); + + crypto_init_queue(&aes_dd->queue, ATMEL_AES_QUEUE_LENGTH); + + aes_dd->irq = -1; + + /* Get the base address */ + aes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!aes_res) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto res_err; + } + aes_dd->phys_base = aes_res->start; + aes_phys_size = resource_size(aes_res); + + /* Get the IRQ */ + aes_dd->irq = platform_get_irq(pdev, 0); + if (aes_dd->irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = aes_dd->irq; + goto aes_irq_err; + } + + err = request_irq(aes_dd->irq, atmel_aes_irq, IRQF_SHARED, "atmel-aes", + aes_dd); + if (err) { + dev_err(dev, "unable to request aes irq.\n"); + goto aes_irq_err; + } + + /* Initializing the clock */ + aes_dd->iclk = clk_get(&pdev->dev, NULL); + if (IS_ERR(aes_dd->iclk)) { + dev_err(dev, "clock intialization failed.\n"); + err = PTR_ERR(aes_dd->iclk); + goto clk_err; + } + + aes_dd->io_base = ioremap(aes_dd->phys_base, aes_phys_size); + if (!aes_dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; + goto aes_io_err; + } + + err = atmel_aes_dma_init(aes_dd); + if (err) + goto err_aes_dma; + + spin_lock(&atmel_aes.lock); + list_add_tail(&aes_dd->list, &atmel_aes.dev_list); + spin_unlock(&atmel_aes.lock); + + err = atmel_aes_register_algs(aes_dd); + if (err) + goto err_algs; + + dev_info(dev, "Atmel AES\n"); + + return 0; + +err_algs: + spin_lock(&atmel_aes.lock); + list_del(&aes_dd->list); + spin_unlock(&atmel_aes.lock); + atmel_aes_dma_cleanup(aes_dd); +err_aes_dma: + iounmap(aes_dd->io_base); +aes_io_err: + clk_put(aes_dd->iclk); +clk_err: + free_irq(aes_dd->irq, aes_dd); +aes_irq_err: +res_err: + tasklet_kill(&aes_dd->done_task); + tasklet_kill(&aes_dd->queue_task); + kfree(aes_dd); + aes_dd = NULL; +aes_dd_err: + dev_err(dev, "initialization failed.\n"); + + return err; +} + +static int __devexit atmel_aes_remove(struct platform_device *pdev) +{ + static struct atmel_aes_dev *aes_dd; + + aes_dd = platform_get_drvdata(pdev); + if (!aes_dd) + return -ENODEV; + spin_lock(&atmel_aes.lock); + list_del(&aes_dd->list); + spin_unlock(&atmel_aes.lock); + + atmel_aes_unregister_algs(aes_dd); + + tasklet_kill(&aes_dd->done_task); + tasklet_kill(&aes_dd->queue_task); + + atmel_aes_dma_cleanup(aes_dd); + + iounmap(aes_dd->io_base); + + clk_put(aes_dd->iclk); + + if (aes_dd->irq > 0) + free_irq(aes_dd->irq, aes_dd); + + kfree(aes_dd); + aes_dd = NULL; + + return 0; +} + +static struct platform_driver atmel_aes_driver = { + .probe = atmel_aes_probe, + .remove = __devexit_p(atmel_aes_remove), + .driver = { + .name = "atmel_aes", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(atmel_aes_driver); + +MODULE_DESCRIPTION("Atmel AES hw acceleration support."); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); -- cgit v0.10.2 From 13802005d8f2db244ec1f5d7f6923de8f7a463db Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Sun, 1 Jul 2012 19:19:45 +0200 Subject: crypto: atmel - add Atmel DES/TDES driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index d04eabe..76d489b 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -349,4 +349,20 @@ config CRYPTO_DEV_ATMEL_AES To compile this driver as a module, choose M here: the module will be called atmel-aes. +config CRYPTO_DEV_ATMEL_TDES + tristate "Support for Atmel DES/TDES hw accelerator" + depends on ARCH_AT91 + select CRYPTO_DES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + help + Some Atmel processors have DES/TDES hw accelerator. + Select this if you want to use the Atmel module for + DES/TDES algorithms. + + To compile this driver as a module, choose M here: the module + will be called atmel-tdes. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 44a8147..6b8b75f 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o +obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o diff --git a/drivers/crypto/atmel-tdes-regs.h b/drivers/crypto/atmel-tdes-regs.h new file mode 100644 index 0000000..5ac2a90 --- /dev/null +++ b/drivers/crypto/atmel-tdes-regs.h @@ -0,0 +1,89 @@ +#ifndef __ATMEL_TDES_REGS_H__ +#define __ATMEL_TDES_REGS_H__ + +#define TDES_CR 0x00 +#define TDES_CR_START (1 << 0) +#define TDES_CR_SWRST (1 << 8) +#define TDES_CR_LOADSEED (1 << 16) + +#define TDES_MR 0x04 +#define TDES_MR_CYPHER_DEC (0 << 0) +#define TDES_MR_CYPHER_ENC (1 << 0) +#define TDES_MR_TDESMOD_MASK (0x3 << 1) +#define TDES_MR_TDESMOD_DES (0x0 << 1) +#define TDES_MR_TDESMOD_TDES (0x1 << 1) +#define TDES_MR_TDESMOD_XTEA (0x2 << 1) +#define TDES_MR_KEYMOD_3KEY (0 << 4) +#define TDES_MR_KEYMOD_2KEY (1 << 4) +#define TDES_MR_SMOD_MASK (0x3 << 8) +#define TDES_MR_SMOD_MANUAL (0x0 << 8) +#define TDES_MR_SMOD_AUTO (0x1 << 8) +#define TDES_MR_SMOD_PDC (0x2 << 8) +#define TDES_MR_OPMOD_MASK (0x3 << 12) +#define TDES_MR_OPMOD_ECB (0x0 << 12) +#define TDES_MR_OPMOD_CBC (0x1 << 12) +#define TDES_MR_OPMOD_OFB (0x2 << 12) +#define TDES_MR_OPMOD_CFB (0x3 << 12) +#define TDES_MR_LOD (0x1 << 15) +#define TDES_MR_CFBS_MASK (0x3 << 16) +#define TDES_MR_CFBS_64b (0x0 << 16) +#define TDES_MR_CFBS_32b (0x1 << 16) +#define TDES_MR_CFBS_16b (0x2 << 16) +#define TDES_MR_CFBS_8b (0x3 << 16) +#define TDES_MR_CKEY_MASK (0xF << 20) +#define TDES_MR_CKEY_OFFSET 20 +#define TDES_MR_CTYPE_MASK (0x3F << 24) +#define TDES_MR_CTYPE_OFFSET 24 + +#define TDES_IER 0x10 +#define TDES_IDR 0x14 +#define TDES_IMR 0x18 +#define TDES_ISR 0x1C +#define TDES_INT_DATARDY (1 << 0) +#define TDES_INT_ENDRX (1 << 1) +#define TDES_INT_ENDTX (1 << 2) +#define TDES_INT_RXBUFF (1 << 3) +#define TDES_INT_TXBUFE (1 << 4) +#define TDES_INT_URAD (1 << 8) +#define TDES_ISR_URAT_MASK (0x3 << 12) +#define TDES_ISR_URAT_IDR (0x0 << 12) +#define TDES_ISR_URAT_ODR (0x1 << 12) +#define TDES_ISR_URAT_MR (0x2 << 12) +#define TDES_ISR_URAT_WO (0x3 << 12) + + +#define TDES_KEY1W1R 0x20 +#define TDES_KEY1W2R 0x24 +#define TDES_KEY2W1R 0x28 +#define TDES_KEY2W2R 0x2C +#define TDES_KEY3W1R 0x30 +#define TDES_KEY3W2R 0x34 +#define TDES_IDATA1R 0x40 +#define TDES_IDATA2R 0x44 +#define TDES_ODATA1R 0x50 +#define TDES_ODATA2R 0x54 +#define TDES_IV1R 0x60 +#define TDES_IV2R 0x64 + +#define TDES_XTEARNDR 0x70 +#define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0) +#define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0 + +#define TDES_RPR 0x100 +#define TDES_RCR 0x104 +#define TDES_TPR 0x108 +#define TDES_TCR 0x10C +#define TDES_RNPR 0x118 +#define TDES_RNCR 0x11C +#define TDES_TNPR 0x118 +#define TDES_TNCR 0x11C +#define TDES_PTCR 0x120 +#define TDES_PTCR_RXTEN (1 << 0) +#define TDES_PTCR_RXTDIS (1 << 1) +#define TDES_PTCR_TXTEN (1 << 8) +#define TDES_PTCR_TXTDIS (1 << 9) +#define TDES_PTSR 0x124 +#define TDES_PTSR_RXTEN (1 << 0) +#define TDES_PTSR_TXTEN (1 << 8) + +#endif /* __ATMEL_TDES_REGS_H__ */ diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c new file mode 100644 index 0000000..eb2b61e --- /dev/null +++ b/drivers/crypto/atmel-tdes.c @@ -0,0 +1,1215 @@ +/* + * Cryptographic API. + * + * Support for ATMEL DES/TDES HW acceleration. + * + * Copyright (c) 2012 Eukréa Electromatique - ATMEL + * Author: Nicolas Royer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Some ideas are from omap-aes.c drivers. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "atmel-tdes-regs.h" + +/* TDES flags */ +#define TDES_FLAGS_MODE_MASK 0x007f +#define TDES_FLAGS_ENCRYPT BIT(0) +#define TDES_FLAGS_CBC BIT(1) +#define TDES_FLAGS_CFB BIT(2) +#define TDES_FLAGS_CFB8 BIT(3) +#define TDES_FLAGS_CFB16 BIT(4) +#define TDES_FLAGS_CFB32 BIT(5) +#define TDES_FLAGS_OFB BIT(6) + +#define TDES_FLAGS_INIT BIT(16) +#define TDES_FLAGS_FAST BIT(17) +#define TDES_FLAGS_BUSY BIT(18) + +#define ATMEL_TDES_QUEUE_LENGTH 1 + +#define CFB8_BLOCK_SIZE 1 +#define CFB16_BLOCK_SIZE 2 +#define CFB32_BLOCK_SIZE 4 +#define CFB64_BLOCK_SIZE 8 + + +struct atmel_tdes_dev; + +struct atmel_tdes_ctx { + struct atmel_tdes_dev *dd; + + int keylen; + u32 key[3*DES_KEY_SIZE / sizeof(u32)]; + unsigned long flags; +}; + +struct atmel_tdes_reqctx { + unsigned long mode; +}; + +struct atmel_tdes_dev { + struct list_head list; + unsigned long phys_base; + void __iomem *io_base; + + struct atmel_tdes_ctx *ctx; + struct device *dev; + struct clk *iclk; + int irq; + + unsigned long flags; + int err; + + spinlock_t lock; + struct crypto_queue queue; + + struct tasklet_struct done_task; + struct tasklet_struct queue_task; + + struct ablkcipher_request *req; + size_t total; + + struct scatterlist *in_sg; + size_t in_offset; + struct scatterlist *out_sg; + size_t out_offset; + + size_t buflen; + size_t dma_size; + + void *buf_in; + int dma_in; + dma_addr_t dma_addr_in; + + void *buf_out; + int dma_out; + dma_addr_t dma_addr_out; +}; + +struct atmel_tdes_drv { + struct list_head dev_list; + spinlock_t lock; +}; + +static struct atmel_tdes_drv atmel_tdes = { + .dev_list = LIST_HEAD_INIT(atmel_tdes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(atmel_tdes.lock), +}; + +static int atmel_tdes_sg_copy(struct scatterlist **sg, size_t *offset, + void *buf, size_t buflen, size_t total, int out) +{ + unsigned int count, off = 0; + + while (buflen && total) { + count = min((*sg)->length - *offset, total); + count = min(count, buflen); + + if (!count) + return off; + + scatterwalk_map_and_copy(buf + off, *sg, *offset, count, out); + + off += count; + buflen -= count; + *offset += count; + total -= count; + + if (*offset == (*sg)->length) { + *sg = sg_next(*sg); + if (*sg) + *offset = 0; + else + total = 0; + } + } + + return off; +} + +static inline u32 atmel_tdes_read(struct atmel_tdes_dev *dd, u32 offset) +{ + return readl_relaxed(dd->io_base + offset); +} + +static inline void atmel_tdes_write(struct atmel_tdes_dev *dd, + u32 offset, u32 value) +{ + writel_relaxed(value, dd->io_base + offset); +} + +static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset, + u32 *value, int count) +{ + for (; count--; value++, offset += 4) + atmel_tdes_write(dd, offset, *value); +} + +static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx) +{ + struct atmel_tdes_dev *tdes_dd = NULL; + struct atmel_tdes_dev *tmp; + + spin_lock_bh(&atmel_tdes.lock); + if (!ctx->dd) { + list_for_each_entry(tmp, &atmel_tdes.dev_list, list) { + tdes_dd = tmp; + break; + } + ctx->dd = tdes_dd; + } else { + tdes_dd = ctx->dd; + } + spin_unlock_bh(&atmel_tdes.lock); + + return tdes_dd; +} + +static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd) +{ + clk_prepare_enable(dd->iclk); + + if (!(dd->flags & TDES_FLAGS_INIT)) { + atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST); + dd->flags |= TDES_FLAGS_INIT; + dd->err = 0; + } + + return 0; +} + +static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) +{ + int err; + u32 valcr = 0, valmr = TDES_MR_SMOD_PDC; + + err = atmel_tdes_hw_init(dd); + + if (err) + return err; + + atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); + + /* MR register must be set before IV registers */ + if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) { + valmr |= TDES_MR_KEYMOD_3KEY; + valmr |= TDES_MR_TDESMOD_TDES; + } else if (dd->ctx->keylen > DES_KEY_SIZE) { + valmr |= TDES_MR_KEYMOD_2KEY; + valmr |= TDES_MR_TDESMOD_TDES; + } else { + valmr |= TDES_MR_TDESMOD_DES; + } + + if (dd->flags & TDES_FLAGS_CBC) { + valmr |= TDES_MR_OPMOD_CBC; + } else if (dd->flags & TDES_FLAGS_CFB) { + valmr |= TDES_MR_OPMOD_CFB; + + if (dd->flags & TDES_FLAGS_CFB8) + valmr |= TDES_MR_CFBS_8b; + else if (dd->flags & TDES_FLAGS_CFB16) + valmr |= TDES_MR_CFBS_16b; + else if (dd->flags & TDES_FLAGS_CFB32) + valmr |= TDES_MR_CFBS_32b; + } else if (dd->flags & TDES_FLAGS_OFB) { + valmr |= TDES_MR_OPMOD_OFB; + } + + if ((dd->flags & TDES_FLAGS_ENCRYPT) || (dd->flags & TDES_FLAGS_OFB)) + valmr |= TDES_MR_CYPHER_ENC; + + atmel_tdes_write(dd, TDES_CR, valcr); + atmel_tdes_write(dd, TDES_MR, valmr); + + atmel_tdes_write_n(dd, TDES_KEY1W1R, dd->ctx->key, + dd->ctx->keylen >> 2); + + if (((dd->flags & TDES_FLAGS_CBC) || (dd->flags & TDES_FLAGS_CFB) || + (dd->flags & TDES_FLAGS_OFB)) && dd->req->info) { + atmel_tdes_write_n(dd, TDES_IV1R, dd->req->info, 2); + } + + return 0; +} + +static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) +{ + int err = 0; + size_t count; + + atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); + + if (dd->flags & TDES_FLAGS_FAST) { + dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); + dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); + } else { + dma_sync_single_for_device(dd->dev, dd->dma_addr_out, + dd->dma_size, DMA_FROM_DEVICE); + + /* copy data */ + count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset, + dd->buf_out, dd->buflen, dd->dma_size, 1); + if (count != dd->dma_size) { + err = -EINVAL; + pr_err("not all data converted: %u\n", count); + } + } + + return err; +} + +static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd) +{ + int err = -ENOMEM; + + dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, 0); + dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, 0); + dd->buflen = PAGE_SIZE; + dd->buflen &= ~(DES_BLOCK_SIZE - 1); + + if (!dd->buf_in || !dd->buf_out) { + dev_err(dd->dev, "unable to alloc pages.\n"); + goto err_alloc; + } + + /* MAP here */ + dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, + dd->buflen, DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, dd->dma_addr_in)) { + dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); + err = -EINVAL; + goto err_map_in; + } + + dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, + dd->buflen, DMA_FROM_DEVICE); + if (dma_mapping_error(dd->dev, dd->dma_addr_out)) { + dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); + err = -EINVAL; + goto err_map_out; + } + + return 0; + +err_map_out: + dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, + DMA_TO_DEVICE); +err_map_in: + free_page((unsigned long)dd->buf_out); + free_page((unsigned long)dd->buf_in); +err_alloc: + if (err) + pr_err("error: %d\n", err); + return err; +} + +static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd) +{ + dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, + DMA_FROM_DEVICE); + dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, + DMA_TO_DEVICE); + free_page((unsigned long)dd->buf_out); + free_page((unsigned long)dd->buf_in); +} + +static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, + dma_addr_t dma_addr_out, int length) +{ + struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); + struct atmel_tdes_dev *dd = ctx->dd; + int len32; + + dd->dma_size = length; + + if (!(dd->flags & TDES_FLAGS_FAST)) { + dma_sync_single_for_device(dd->dev, dma_addr_in, length, + DMA_TO_DEVICE); + } + + if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB8)) + len32 = DIV_ROUND_UP(length, sizeof(u8)); + else if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB16)) + len32 = DIV_ROUND_UP(length, sizeof(u16)); + else + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); + atmel_tdes_write(dd, TDES_TPR, dma_addr_in); + atmel_tdes_write(dd, TDES_TCR, len32); + atmel_tdes_write(dd, TDES_RPR, dma_addr_out); + atmel_tdes_write(dd, TDES_RCR, len32); + + /* Enable Interrupt */ + atmel_tdes_write(dd, TDES_IER, TDES_INT_ENDRX); + + /* Start DMA transfer */ + atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTEN | TDES_PTCR_RXTEN); + + return 0; +} + +static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm( + crypto_ablkcipher_reqtfm(dd->req)); + int err, fast = 0, in, out; + size_t count; + dma_addr_t addr_in, addr_out; + + if (sg_is_last(dd->in_sg) && sg_is_last(dd->out_sg)) { + /* check for alignment */ + in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)); + out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)); + + fast = in && out; + } + + if (fast) { + count = min(dd->total, sg_dma_len(dd->in_sg)); + count = min(count, sg_dma_len(dd->out_sg)); + + if (count != dd->total) { + pr_err("request length != buffer length\n"); + return -EINVAL; + } + + err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); + if (!err) { + dev_err(dd->dev, "dma_map_sg() error\n"); + return -EINVAL; + } + + err = dma_map_sg(dd->dev, dd->out_sg, 1, + DMA_FROM_DEVICE); + if (!err) { + dev_err(dd->dev, "dma_map_sg() error\n"); + dma_unmap_sg(dd->dev, dd->in_sg, 1, + DMA_TO_DEVICE); + return -EINVAL; + } + + addr_in = sg_dma_address(dd->in_sg); + addr_out = sg_dma_address(dd->out_sg); + + dd->flags |= TDES_FLAGS_FAST; + + } else { + /* use cache buffers */ + count = atmel_tdes_sg_copy(&dd->in_sg, &dd->in_offset, + dd->buf_in, dd->buflen, dd->total, 0); + + addr_in = dd->dma_addr_in; + addr_out = dd->dma_addr_out; + + dd->flags &= ~TDES_FLAGS_FAST; + + } + + dd->total -= count; + + err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count); + if (err) { + dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); + } + + return err; +} + + +static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) +{ + struct ablkcipher_request *req = dd->req; + + clk_disable_unprepare(dd->iclk); + + dd->flags &= ~TDES_FLAGS_BUSY; + + req->base.complete(&req->base, err); +} + +static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, + struct ablkcipher_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct atmel_tdes_ctx *ctx; + struct atmel_tdes_reqctx *rctx; + unsigned long flags; + int err, ret = 0; + + spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ablkcipher_enqueue_request(&dd->queue, req); + if (dd->flags & TDES_FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&dd->queue); + async_req = crypto_dequeue_request(&dd->queue); + if (async_req) + dd->flags |= TDES_FLAGS_BUSY; + spin_unlock_irqrestore(&dd->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ablkcipher_request_cast(async_req); + + /* assign new request to device */ + dd->req = req; + dd->total = req->nbytes; + dd->in_offset = 0; + dd->in_sg = req->src; + dd->out_offset = 0; + dd->out_sg = req->dst; + + rctx = ablkcipher_request_ctx(req); + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx->mode &= TDES_FLAGS_MODE_MASK; + dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode; + dd->ctx = ctx; + ctx->dd = dd; + + err = atmel_tdes_write_ctrl(dd); + if (!err) + err = atmel_tdes_crypt_dma_start(dd); + if (err) { + /* des_task will not finish it, so do it here */ + atmel_tdes_finish_req(dd, err); + tasklet_schedule(&dd->queue_task); + } + + return ret; +} + + +static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode) +{ + struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req); + struct atmel_tdes_dev *dd; + + if (mode & TDES_FLAGS_CFB8) { + if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) { + pr_err("request size is not exact amount of CFB8 blocks\n"); + return -EINVAL; + } + } else if (mode & TDES_FLAGS_CFB16) { + if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) { + pr_err("request size is not exact amount of CFB16 blocks\n"); + return -EINVAL; + } + } else if (mode & TDES_FLAGS_CFB32) { + if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) { + pr_err("request size is not exact amount of CFB32 blocks\n"); + return -EINVAL; + } + } else if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) { + pr_err("request size is not exact amount of DES blocks\n"); + return -EINVAL; + } + + dd = atmel_tdes_find_dev(ctx); + if (!dd) + return -ENODEV; + + rctx->mode = mode; + + return atmel_tdes_handle_queue(dd, req); +} + +static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + u32 tmp[DES_EXPKEY_WORDS]; + int err; + struct crypto_tfm *ctfm = crypto_ablkcipher_tfm(tfm); + + struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (keylen != DES_KEY_SIZE) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + err = des_ekey(tmp, key); + if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + ctfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + memcpy(ctx->key, key, keylen); + ctx->keylen = keylen; + + return 0; +} + +static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + const char *alg_name; + + alg_name = crypto_tfm_alg_name(crypto_ablkcipher_tfm(tfm)); + + /* + * HW bug in cfb 3-keys mode. + */ + if (strstr(alg_name, "cfb") && (keylen != 2*DES_KEY_SIZE)) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, keylen); + ctx->keylen = keylen; + + return 0; +} + +static int atmel_tdes_ecb_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT); +} + +static int atmel_tdes_ecb_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, 0); +} + +static int atmel_tdes_cbc_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CBC); +} + +static int atmel_tdes_cbc_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_CBC); +} +static int atmel_tdes_cfb_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB); +} + +static int atmel_tdes_cfb_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_CFB); +} + +static int atmel_tdes_cfb8_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | + TDES_FLAGS_CFB8); +} + +static int atmel_tdes_cfb8_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB8); +} + +static int atmel_tdes_cfb16_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | + TDES_FLAGS_CFB16); +} + +static int atmel_tdes_cfb16_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB16); +} + +static int atmel_tdes_cfb32_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | + TDES_FLAGS_CFB32); +} + +static int atmel_tdes_cfb32_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB32); +} + +static int atmel_tdes_ofb_encrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_OFB); +} + +static int atmel_tdes_ofb_decrypt(struct ablkcipher_request *req) +{ + return atmel_tdes_crypt(req, TDES_FLAGS_OFB); +} + +static int atmel_tdes_cra_init(struct crypto_tfm *tfm) +{ + tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx); + + return 0; +} + +static void atmel_tdes_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct crypto_alg tdes_algs[] = { +{ + .cra_name = "ecb(des)", + .cra_driver_name = "atmel-ecb-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_ecb_encrypt, + .decrypt = atmel_tdes_ecb_decrypt, + } +}, +{ + .cra_name = "cbc(des)", + .cra_driver_name = "atmel-cbc-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_cbc_encrypt, + .decrypt = atmel_tdes_cbc_decrypt, + } +}, +{ + .cra_name = "cfb(des)", + .cra_driver_name = "atmel-cfb-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_cfb_encrypt, + .decrypt = atmel_tdes_cfb_decrypt, + } +}, +{ + .cra_name = "cfb8(des)", + .cra_driver_name = "atmel-cfb8-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB8_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_cfb8_encrypt, + .decrypt = atmel_tdes_cfb8_decrypt, + } +}, +{ + .cra_name = "cfb16(des)", + .cra_driver_name = "atmel-cfb16-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB16_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_cfb16_encrypt, + .decrypt = atmel_tdes_cfb16_decrypt, + } +}, +{ + .cra_name = "cfb32(des)", + .cra_driver_name = "atmel-cfb32-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_cfb32_encrypt, + .decrypt = atmel_tdes_cfb32_decrypt, + } +}, +{ + .cra_name = "ofb(des)", + .cra_driver_name = "atmel-ofb-des", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_des_setkey, + .encrypt = atmel_tdes_ofb_encrypt, + .decrypt = atmel_tdes_ofb_decrypt, + } +}, +{ + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "atmel-ecb-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2 * DES_KEY_SIZE, + .max_keysize = 3 * DES_KEY_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_ecb_encrypt, + .decrypt = atmel_tdes_ecb_decrypt, + } +}, +{ + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "atmel-cbc-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 3*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_cbc_encrypt, + .decrypt = atmel_tdes_cbc_decrypt, + } +}, +{ + .cra_name = "cfb(des3_ede)", + .cra_driver_name = "atmel-cfb-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 2*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_cfb_encrypt, + .decrypt = atmel_tdes_cfb_decrypt, + } +}, +{ + .cra_name = "cfb8(des3_ede)", + .cra_driver_name = "atmel-cfb8-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB8_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 2*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_cfb8_encrypt, + .decrypt = atmel_tdes_cfb8_decrypt, + } +}, +{ + .cra_name = "cfb16(des3_ede)", + .cra_driver_name = "atmel-cfb16-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB16_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 2*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_cfb16_encrypt, + .decrypt = atmel_tdes_cfb16_decrypt, + } +}, +{ + .cra_name = "cfb32(des3_ede)", + .cra_driver_name = "atmel-cfb32-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CFB32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 2*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_cfb32_encrypt, + .decrypt = atmel_tdes_cfb32_decrypt, + } +}, +{ + .cra_name = "ofb(des3_ede)", + .cra_driver_name = "atmel-ofb-tdes", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_tdes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_tdes_cra_init, + .cra_exit = atmel_tdes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2*DES_KEY_SIZE, + .max_keysize = 3*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = atmel_tdes_setkey, + .encrypt = atmel_tdes_ofb_encrypt, + .decrypt = atmel_tdes_ofb_decrypt, + } +}, +}; + +static void atmel_tdes_queue_task(unsigned long data) +{ + struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *)data; + + atmel_tdes_handle_queue(dd, NULL); +} + +static void atmel_tdes_done_task(unsigned long data) +{ + struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data; + int err; + + err = atmel_tdes_crypt_dma_stop(dd); + + err = dd->err ? : err; + + if (dd->total && !err) { + err = atmel_tdes_crypt_dma_start(dd); + if (!err) + return; + } + + atmel_tdes_finish_req(dd, err); + atmel_tdes_handle_queue(dd, NULL); +} + +static irqreturn_t atmel_tdes_irq(int irq, void *dev_id) +{ + struct atmel_tdes_dev *tdes_dd = dev_id; + u32 reg; + + reg = atmel_tdes_read(tdes_dd, TDES_ISR); + if (reg & atmel_tdes_read(tdes_dd, TDES_IMR)) { + atmel_tdes_write(tdes_dd, TDES_IDR, reg); + if (TDES_FLAGS_BUSY & tdes_dd->flags) + tasklet_schedule(&tdes_dd->done_task); + else + dev_warn(tdes_dd->dev, "TDES interrupt when no active requests.\n"); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static void atmel_tdes_unregister_algs(struct atmel_tdes_dev *dd) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tdes_algs); i++) + crypto_unregister_alg(&tdes_algs[i]); +} + +static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd) +{ + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(tdes_algs); i++) { + INIT_LIST_HEAD(&tdes_algs[i].cra_list); + err = crypto_register_alg(&tdes_algs[i]); + if (err) + goto err_tdes_algs; + } + + return 0; + +err_tdes_algs: + for (j = 0; j < i; j++) + crypto_unregister_alg(&tdes_algs[j]); + + return err; +} + +static int __devinit atmel_tdes_probe(struct platform_device *pdev) +{ + struct atmel_tdes_dev *tdes_dd; + struct device *dev = &pdev->dev; + struct resource *tdes_res; + unsigned long tdes_phys_size; + int err; + + tdes_dd = kzalloc(sizeof(struct atmel_tdes_dev), GFP_KERNEL); + if (tdes_dd == NULL) { + dev_err(dev, "unable to alloc data struct.\n"); + err = -ENOMEM; + goto tdes_dd_err; + } + + tdes_dd->dev = dev; + + platform_set_drvdata(pdev, tdes_dd); + + INIT_LIST_HEAD(&tdes_dd->list); + + tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task, + (unsigned long)tdes_dd); + tasklet_init(&tdes_dd->queue_task, atmel_tdes_queue_task, + (unsigned long)tdes_dd); + + crypto_init_queue(&tdes_dd->queue, ATMEL_TDES_QUEUE_LENGTH); + + tdes_dd->irq = -1; + + /* Get the base address */ + tdes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!tdes_res) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto res_err; + } + tdes_dd->phys_base = tdes_res->start; + tdes_phys_size = resource_size(tdes_res); + + /* Get the IRQ */ + tdes_dd->irq = platform_get_irq(pdev, 0); + if (tdes_dd->irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = tdes_dd->irq; + goto res_err; + } + + err = request_irq(tdes_dd->irq, atmel_tdes_irq, IRQF_SHARED, + "atmel-tdes", tdes_dd); + if (err) { + dev_err(dev, "unable to request tdes irq.\n"); + goto tdes_irq_err; + } + + /* Initializing the clock */ + tdes_dd->iclk = clk_get(&pdev->dev, NULL); + if (IS_ERR(tdes_dd->iclk)) { + dev_err(dev, "clock intialization failed.\n"); + err = PTR_ERR(tdes_dd->iclk); + goto clk_err; + } + + tdes_dd->io_base = ioremap(tdes_dd->phys_base, tdes_phys_size); + if (!tdes_dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; + goto tdes_io_err; + } + + err = atmel_tdes_dma_init(tdes_dd); + if (err) + goto err_tdes_dma; + + spin_lock(&atmel_tdes.lock); + list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list); + spin_unlock(&atmel_tdes.lock); + + err = atmel_tdes_register_algs(tdes_dd); + if (err) + goto err_algs; + + dev_info(dev, "Atmel DES/TDES\n"); + + return 0; + +err_algs: + spin_lock(&atmel_tdes.lock); + list_del(&tdes_dd->list); + spin_unlock(&atmel_tdes.lock); + atmel_tdes_dma_cleanup(tdes_dd); +err_tdes_dma: + iounmap(tdes_dd->io_base); +tdes_io_err: + clk_put(tdes_dd->iclk); +clk_err: + free_irq(tdes_dd->irq, tdes_dd); +tdes_irq_err: +res_err: + tasklet_kill(&tdes_dd->done_task); + tasklet_kill(&tdes_dd->queue_task); + kfree(tdes_dd); + tdes_dd = NULL; +tdes_dd_err: + dev_err(dev, "initialization failed.\n"); + + return err; +} + +static int __devexit atmel_tdes_remove(struct platform_device *pdev) +{ + static struct atmel_tdes_dev *tdes_dd; + + tdes_dd = platform_get_drvdata(pdev); + if (!tdes_dd) + return -ENODEV; + spin_lock(&atmel_tdes.lock); + list_del(&tdes_dd->list); + spin_unlock(&atmel_tdes.lock); + + atmel_tdes_unregister_algs(tdes_dd); + + tasklet_kill(&tdes_dd->done_task); + tasklet_kill(&tdes_dd->queue_task); + + atmel_tdes_dma_cleanup(tdes_dd); + + iounmap(tdes_dd->io_base); + + clk_put(tdes_dd->iclk); + + if (tdes_dd->irq >= 0) + free_irq(tdes_dd->irq, tdes_dd); + + kfree(tdes_dd); + tdes_dd = NULL; + + return 0; +} + +static struct platform_driver atmel_tdes_driver = { + .probe = atmel_tdes_probe, + .remove = __devexit_p(atmel_tdes_remove), + .driver = { + .name = "atmel_tdes", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(atmel_tdes_driver); + +MODULE_DESCRIPTION("Atmel DES/TDES hw acceleration support."); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); -- cgit v0.10.2 From ebc82efa1cd64efba0f41455460411b852b5b89c Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Sun, 1 Jul 2012 19:19:46 +0200 Subject: crypto: atmel - add Atmel SHA1/SHA256 driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 76d489b..7d74d09 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -365,4 +365,18 @@ config CRYPTO_DEV_ATMEL_TDES To compile this driver as a module, choose M here: the module will be called atmel-tdes. +config CRYPTO_DEV_ATMEL_SHA + tristate "Support for Atmel SHA1/SHA256 hw accelerator" + depends on ARCH_AT91 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_ALGAPI + help + Some Atmel processors have SHA1/SHA256 hw accelerator. + Select this if you want to use the Atmel module for + SHA1/SHA256 algorithms. + + To compile this driver as a module, choose M here: the module + will be called atmel-sha. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 6b8b75f..880a47b 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -19,3 +19,4 @@ obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o +obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h new file mode 100644 index 0000000..dc53a20 --- /dev/null +++ b/drivers/crypto/atmel-sha-regs.h @@ -0,0 +1,46 @@ +#ifndef __ATMEL_SHA_REGS_H__ +#define __ATMEL_SHA_REGS_H__ + +#define SHA_REG_DIGEST(x) (0x80 + ((x) * 0x04)) +#define SHA_REG_DIN(x) (0x40 + ((x) * 0x04)) + +#define SHA_CR 0x00 +#define SHA_CR_START (1 << 0) +#define SHA_CR_FIRST (1 << 4) +#define SHA_CR_SWRST (1 << 8) + +#define SHA_MR 0x04 +#define SHA_MR_MODE_MASK (0x3 << 0) +#define SHA_MR_MODE_MANUAL 0x0 +#define SHA_MR_MODE_AUTO 0x1 +#define SHA_MR_MODE_PDC 0x2 +#define SHA_MR_DUALBUFF (1 << 3) +#define SHA_MR_PROCDLY (1 << 4) +#define SHA_MR_ALGO_SHA1 (0 << 8) +#define SHA_MR_ALGO_SHA256 (1 << 8) + +#define SHA_IER 0x10 +#define SHA_IDR 0x14 +#define SHA_IMR 0x18 +#define SHA_ISR 0x1C +#define SHA_INT_DATARDY (1 << 0) +#define SHA_INT_ENDTX (1 << 1) +#define SHA_INT_TXBUFE (1 << 2) +#define SHA_INT_URAD (1 << 8) +#define SHA_ISR_URAT_MASK (0x7 << 12) +#define SHA_ISR_URAT_IDR (0x0 << 12) +#define SHA_ISR_URAT_ODR (0x1 << 12) +#define SHA_ISR_URAT_MR (0x2 << 12) +#define SHA_ISR_URAT_WO (0x5 << 12) + +#define SHA_TPR 0x108 +#define SHA_TCR 0x10C +#define SHA_TNPR 0x118 +#define SHA_TNCR 0x11C +#define SHA_PTCR 0x120 +#define SHA_PTCR_TXTEN (1 << 8) +#define SHA_PTCR_TXTDIS (1 << 9) +#define SHA_PTSR 0x124 +#define SHA_PTSR_TXTEN (1 << 8) + +#endif /* __ATMEL_SHA_REGS_H__ */ diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c new file mode 100644 index 0000000..f938b9d --- /dev/null +++ b/drivers/crypto/atmel-sha.c @@ -0,0 +1,1112 @@ +/* + * Cryptographic API. + * + * Support for ATMEL SHA1/SHA256 HW acceleration. + * + * Copyright (c) 2012 Eukréa Electromatique - ATMEL + * Author: Nicolas Royer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Some ideas are from omap-sham.c drivers. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "atmel-sha-regs.h" + +/* SHA flags */ +#define SHA_FLAGS_BUSY BIT(0) +#define SHA_FLAGS_FINAL BIT(1) +#define SHA_FLAGS_DMA_ACTIVE BIT(2) +#define SHA_FLAGS_OUTPUT_READY BIT(3) +#define SHA_FLAGS_INIT BIT(4) +#define SHA_FLAGS_CPU BIT(5) +#define SHA_FLAGS_DMA_READY BIT(6) + +#define SHA_FLAGS_FINUP BIT(16) +#define SHA_FLAGS_SG BIT(17) +#define SHA_FLAGS_SHA1 BIT(18) +#define SHA_FLAGS_SHA256 BIT(19) +#define SHA_FLAGS_ERROR BIT(20) +#define SHA_FLAGS_PAD BIT(21) + +#define SHA_FLAGS_DUALBUFF BIT(24) + +#define SHA_OP_UPDATE 1 +#define SHA_OP_FINAL 2 + +#define SHA_BUFFER_LEN PAGE_SIZE + +#define ATMEL_SHA_DMA_THRESHOLD 56 + + +struct atmel_sha_dev; + +struct atmel_sha_reqctx { + struct atmel_sha_dev *dd; + unsigned long flags; + unsigned long op; + + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); + size_t digcnt; + size_t bufcnt; + size_t buflen; + dma_addr_t dma_addr; + + /* walk state */ + struct scatterlist *sg; + unsigned int offset; /* offset in current sg */ + unsigned int total; /* total request */ + + u8 buffer[0] __aligned(sizeof(u32)); +}; + +struct atmel_sha_ctx { + struct atmel_sha_dev *dd; + + unsigned long flags; + + /* fallback stuff */ + struct crypto_shash *fallback; + +}; + +#define ATMEL_SHA_QUEUE_LENGTH 1 + +struct atmel_sha_dev { + struct list_head list; + unsigned long phys_base; + struct device *dev; + struct clk *iclk; + int irq; + void __iomem *io_base; + + spinlock_t lock; + int err; + struct tasklet_struct done_task; + + unsigned long flags; + struct crypto_queue queue; + struct ahash_request *req; +}; + +struct atmel_sha_drv { + struct list_head dev_list; + spinlock_t lock; +}; + +static struct atmel_sha_drv atmel_sha = { + .dev_list = LIST_HEAD_INIT(atmel_sha.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock), +}; + +static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset) +{ + return readl_relaxed(dd->io_base + offset); +} + +static inline void atmel_sha_write(struct atmel_sha_dev *dd, + u32 offset, u32 value) +{ + writel_relaxed(value, dd->io_base + offset); +} + +static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd) +{ + atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF); + + if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF) + dd->flags |= SHA_FLAGS_DUALBUFF; +} + +static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) +{ + size_t count; + + while ((ctx->bufcnt < ctx->buflen) && ctx->total) { + count = min(ctx->sg->length - ctx->offset, ctx->total); + count = min(count, ctx->buflen - ctx->bufcnt); + + if (count <= 0) + break; + + scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, + ctx->offset, count, 0); + + ctx->bufcnt += count; + ctx->offset += count; + ctx->total -= count; + + if (ctx->offset == ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + else + ctx->total = 0; + } + } + + return 0; +} + +/* + * The purpose of this padding is to ensure that the padded message + * is a multiple of 512 bits. The bit "1" is appended at the end of + * the message followed by "padlen-1" zero bits. Then a 64 bits block + * equals to the message length in bits is appended. + * + * padlen is calculated as followed: + * - if message length < 56 bytes then padlen = 56 - message length + * - else padlen = 64 + 56 - message length + */ +static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) +{ + unsigned int index, padlen; + u64 bits; + u64 size; + + bits = (ctx->bufcnt + ctx->digcnt + length) << 3; + size = cpu_to_be64(bits); + + index = ctx->bufcnt & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64+56) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8); + ctx->bufcnt += padlen + 8; + ctx->flags |= SHA_FLAGS_PAD; +} + +static int atmel_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_dev *dd = NULL; + struct atmel_sha_dev *tmp; + + spin_lock_bh(&atmel_sha.lock); + if (!tctx->dd) { + list_for_each_entry(tmp, &atmel_sha.dev_list, list) { + dd = tmp; + break; + } + tctx->dd = dd; + } else { + dd = tctx->dd; + } + + spin_unlock_bh(&atmel_sha.lock); + + ctx->dd = dd; + + ctx->flags = 0; + + dev_dbg(dd->dev, "init: digest size: %d\n", + crypto_ahash_digestsize(tfm)); + + if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE) + ctx->flags |= SHA_FLAGS_SHA1; + else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE) + ctx->flags |= SHA_FLAGS_SHA256; + + ctx->bufcnt = 0; + ctx->digcnt = 0; + ctx->buflen = SHA_BUFFER_LEN; + + return 0; +} + +static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + u32 valcr = 0, valmr = SHA_MR_MODE_AUTO; + + if (likely(dma)) { + atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE); + valmr = SHA_MR_MODE_PDC; + if (dd->flags & SHA_FLAGS_DUALBUFF) + valmr = SHA_MR_DUALBUFF; + } else { + atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); + } + + if (ctx->flags & SHA_FLAGS_SHA256) + valmr |= SHA_MR_ALGO_SHA256; + + /* Setting CR_FIRST only for the first iteration */ + if (!ctx->digcnt) + valcr = SHA_CR_FIRST; + + atmel_sha_write(dd, SHA_CR, valcr); + atmel_sha_write(dd, SHA_MR, valmr); +} + +static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf, + size_t length, int final) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + int count, len32; + const u32 *buffer = (const u32 *)buf; + + dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", + ctx->digcnt, length, final); + + atmel_sha_write_ctrl(dd, 0); + + /* should be non-zero before next lines to disable clocks later */ + ctx->digcnt += length; + + if (final) + dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ + + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + dd->flags |= SHA_FLAGS_CPU; + + for (count = 0; count < len32; count++) + atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]); + + return -EINPROGRESS; +} + +static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, + size_t length1, dma_addr_t dma_addr2, size_t length2, int final) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + int len32; + + dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n", + ctx->digcnt, length1, final); + + len32 = DIV_ROUND_UP(length1, sizeof(u32)); + atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS); + atmel_sha_write(dd, SHA_TPR, dma_addr1); + atmel_sha_write(dd, SHA_TCR, len32); + + len32 = DIV_ROUND_UP(length2, sizeof(u32)); + atmel_sha_write(dd, SHA_TNPR, dma_addr2); + atmel_sha_write(dd, SHA_TNCR, len32); + + atmel_sha_write_ctrl(dd, 1); + + /* should be non-zero before next lines to disable clocks later */ + ctx->digcnt += length1; + + if (final) + dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ + + dd->flags |= SHA_FLAGS_DMA_ACTIVE; + + /* Start DMA transfer */ + atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN); + + return -EINPROGRESS; +} + +static int atmel_sha_update_cpu(struct atmel_sha_dev *dd) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + int bufcnt; + + atmel_sha_append_sg(ctx); + atmel_sha_fill_padding(ctx, 0); + + bufcnt = ctx->bufcnt; + ctx->bufcnt = 0; + + return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1); +} + +static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, + struct atmel_sha_reqctx *ctx, + size_t length, int final) +{ + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, + ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, ctx->dma_addr)) { + dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + + SHA1_BLOCK_SIZE); + return -EINVAL; + } + + ctx->flags &= ~SHA_FLAGS_SG; + + /* next call does not fail... so no unmap in the case of error */ + return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final); +} + +static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + unsigned int final; + size_t count; + + atmel_sha_append_sg(ctx); + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n", + ctx->bufcnt, ctx->digcnt, final); + + if (final) + atmel_sha_fill_padding(ctx, 0); + + if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { + count = ctx->bufcnt; + ctx->bufcnt = 0; + return atmel_sha_xmit_dma_map(dd, ctx, count, final); + } + + return 0; +} + +static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + unsigned int length, final, tail; + struct scatterlist *sg; + unsigned int count; + + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return atmel_sha_update_dma_slow(dd); + + dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", + ctx->digcnt, ctx->bufcnt, ctx->total); + + sg = ctx->sg; + + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return atmel_sha_update_dma_slow(dd); + + if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE)) + /* size is not SHA1_BLOCK_SIZE aligned */ + return atmel_sha_update_dma_slow(dd); + + length = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & SHA_FLAGS_FINUP)) { + /* not last sg must be SHA1_BLOCK_SIZE aligned */ + tail = length & (SHA1_BLOCK_SIZE - 1); + length -= tail; + if (length == 0) { + /* offset where to start slow */ + ctx->offset = length; + return atmel_sha_update_dma_slow(dd); + } + } + } + + ctx->total -= length; + ctx->offset = length; /* offset where to start slow */ + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + /* Add padding */ + if (final) { + tail = length & (SHA1_BLOCK_SIZE - 1); + length -= tail; + ctx->total += tail; + ctx->offset = length; /* offset where to start slow */ + + sg = ctx->sg; + atmel_sha_append_sg(ctx); + + atmel_sha_fill_padding(ctx, length); + + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, + ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, ctx->dma_addr)) { + dev_err(dd->dev, "dma %u bytes error\n", + ctx->buflen + SHA1_BLOCK_SIZE); + return -EINVAL; + } + + if (length == 0) { + ctx->flags &= ~SHA_FLAGS_SG; + count = ctx->bufcnt; + ctx->bufcnt = 0; + return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0, + 0, final); + } else { + ctx->sg = sg; + if (!dma_map_sg(dd->dev, ctx->sg, 1, + DMA_TO_DEVICE)) { + dev_err(dd->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + + count = ctx->bufcnt; + ctx->bufcnt = 0; + return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), + length, ctx->dma_addr, count, final); + } + } + + if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(dd->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + + /* next call does not fail... so no unmap in the case of error */ + return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0, + 0, final); +} + +static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); + + if (ctx->flags & SHA_FLAGS_SG) { + dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + if (ctx->flags & SHA_FLAGS_PAD) + dma_unmap_single(dd->dev, ctx->dma_addr, + ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); + } else { + dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen + + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); + } + + return 0; +} + +static int atmel_sha_update_req(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err; + + dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n", + ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0); + + if (ctx->flags & SHA_FLAGS_CPU) + err = atmel_sha_update_cpu(dd); + else + err = atmel_sha_update_dma_start(dd); + + /* wait for dma completion before can take more data */ + dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", + err, ctx->digcnt); + + return err; +} + +static int atmel_sha_final_req(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err = 0; + int count; + + if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) { + atmel_sha_fill_padding(ctx, 0); + count = ctx->bufcnt; + ctx->bufcnt = 0; + err = atmel_sha_xmit_dma_map(dd, ctx, count, 1); + } + /* faster to handle last block with cpu */ + else { + atmel_sha_fill_padding(ctx, 0); + count = ctx->bufcnt; + ctx->bufcnt = 0; + err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1); + } + + dev_dbg(dd->dev, "final_req: err: %d\n", err); + + return err; +} + +static void atmel_sha_copy_hash(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + u32 *hash = (u32 *)ctx->digest; + int i; + + if (likely(ctx->flags & SHA_FLAGS_SHA1)) + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) + hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); + else + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++) + hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); +} + +static void atmel_sha_copy_ready_hash(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + + if (!req->result) + return; + + if (likely(ctx->flags & SHA_FLAGS_SHA1)) + memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); + else + memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); +} + +static int atmel_sha_finish(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_dev *dd = ctx->dd; + int err = 0; + + if (ctx->digcnt) + atmel_sha_copy_ready_hash(req); + + dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, + ctx->bufcnt); + + return err; +} + +static void atmel_sha_finish_req(struct ahash_request *req, int err) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_dev *dd = ctx->dd; + + if (!err) { + atmel_sha_copy_hash(req); + if (SHA_FLAGS_FINAL & dd->flags) + err = atmel_sha_finish(req); + } else { + ctx->flags |= SHA_FLAGS_ERROR; + } + + /* atomic operation is not needed here */ + dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | + SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); + + clk_disable_unprepare(dd->iclk); + + if (req->base.complete) + req->base.complete(&req->base, err); + + /* handle new request */ + tasklet_schedule(&dd->done_task); +} + +static int atmel_sha_hw_init(struct atmel_sha_dev *dd) +{ + clk_prepare_enable(dd->iclk); + + if (SHA_FLAGS_INIT & dd->flags) { + atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); + atmel_sha_dualbuff_test(dd); + dd->flags |= SHA_FLAGS_INIT; + dd->err = 0; + } + + return 0; +} + +static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, + struct ahash_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct atmel_sha_reqctx *ctx; + unsigned long flags; + int err = 0, ret = 0; + + spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ahash_enqueue_request(&dd->queue, req); + + if (SHA_FLAGS_BUSY & dd->flags) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } + + backlog = crypto_get_backlog(&dd->queue); + async_req = crypto_dequeue_request(&dd->queue); + if (async_req) + dd->flags |= SHA_FLAGS_BUSY; + + spin_unlock_irqrestore(&dd->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + dd->req = req; + ctx = ahash_request_ctx(req); + + dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", + ctx->op, req->nbytes); + + err = atmel_sha_hw_init(dd); + + if (err) + goto err1; + + if (ctx->op == SHA_OP_UPDATE) { + err = atmel_sha_update_req(dd); + if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) { + /* no final() after finup() */ + err = atmel_sha_final_req(dd); + } + } else if (ctx->op == SHA_OP_FINAL) { + err = atmel_sha_final_req(dd); + } + +err1: + if (err != -EINPROGRESS) + /* done_task will not finish it, so do it here */ + atmel_sha_finish_req(req, err); + + dev_dbg(dd->dev, "exit, err: %d\n", err); + + return ret; +} + +static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct atmel_sha_dev *dd = tctx->dd; + + ctx->op = op; + + return atmel_sha_handle_queue(dd, req); +} + +static int atmel_sha_update(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + + if (!req->nbytes) + return 0; + + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->offset = 0; + + if (ctx->flags & SHA_FLAGS_FINUP) { + if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD) + /* faster to use CPU for short transfers */ + ctx->flags |= SHA_FLAGS_CPU; + } else if (ctx->bufcnt + ctx->total < ctx->buflen) { + atmel_sha_append_sg(ctx); + return 0; + } + return atmel_sha_enqueue(req, SHA_OP_UPDATE); +} + +static int atmel_sha_final(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct atmel_sha_dev *dd = tctx->dd; + + int err = 0; + + ctx->flags |= SHA_FLAGS_FINUP; + + if (ctx->flags & SHA_FLAGS_ERROR) + return 0; /* uncompleted hash is not needed */ + + if (ctx->bufcnt) { + return atmel_sha_enqueue(req, SHA_OP_FINAL); + } else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */ + err = atmel_sha_hw_init(dd); + if (err) + goto err1; + + dd->flags |= SHA_FLAGS_BUSY; + err = atmel_sha_final_req(dd); + } else { + /* copy ready hash (+ finalize hmac) */ + return atmel_sha_finish(req); + } + +err1: + if (err != -EINPROGRESS) + /* done_task will not finish it, so do it here */ + atmel_sha_finish_req(req, err); + + return err; +} + +static int atmel_sha_finup(struct ahash_request *req) +{ + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err1, err2; + + ctx->flags |= SHA_FLAGS_FINUP; + + err1 = atmel_sha_update(req); + if (err1 == -EINPROGRESS || err1 == -EBUSY) + return err1; + + /* + * final() has to be always called to cleanup resources + * even if udpate() failed, except EINPROGRESS + */ + err2 = atmel_sha_final(req); + + return err1 ?: err2; +} + +static int atmel_sha_digest(struct ahash_request *req) +{ + return atmel_sha_init(req) ?: atmel_sha_finup(req); +} + +static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) +{ + struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm); + const char *alg_name = crypto_tfm_alg_name(tfm); + + /* Allocate a fallback and abort if it failed. */ + tctx->fallback = crypto_alloc_shash(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(tctx->fallback)) { + pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n", + alg_name); + return PTR_ERR(tctx->fallback); + } + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct atmel_sha_reqctx) + + SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); + + return 0; +} + +static int atmel_sha_cra_init(struct crypto_tfm *tfm) +{ + return atmel_sha_cra_init_alg(tfm, NULL); +} + +static void atmel_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(tctx->fallback); + tctx->fallback = NULL; +} + +static struct ahash_alg sha_algs[] = { +{ + .init = atmel_sha_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .finup = atmel_sha_finup, + .digest = atmel_sha_digest, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "atmel-sha1", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_cra_init, + .cra_exit = atmel_sha_cra_exit, + } + } +}, +{ + .init = atmel_sha_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .finup = atmel_sha_finup, + .digest = atmel_sha_digest, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .base = { + .cra_name = "sha256", + .cra_driver_name = "atmel-sha256", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_cra_init, + .cra_exit = atmel_sha_cra_exit, + } + } +}, +}; + +static void atmel_sha_done_task(unsigned long data) +{ + struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; + int err = 0; + + if (!(SHA_FLAGS_BUSY & dd->flags)) { + atmel_sha_handle_queue(dd, NULL); + return; + } + + if (SHA_FLAGS_CPU & dd->flags) { + if (SHA_FLAGS_OUTPUT_READY & dd->flags) { + dd->flags &= ~SHA_FLAGS_OUTPUT_READY; + goto finish; + } + } else if (SHA_FLAGS_DMA_READY & dd->flags) { + if (SHA_FLAGS_DMA_ACTIVE & dd->flags) { + dd->flags &= ~SHA_FLAGS_DMA_ACTIVE; + atmel_sha_update_dma_stop(dd); + if (dd->err) { + err = dd->err; + goto finish; + } + } + if (SHA_FLAGS_OUTPUT_READY & dd->flags) { + /* hash or semi-hash ready */ + dd->flags &= ~(SHA_FLAGS_DMA_READY | + SHA_FLAGS_OUTPUT_READY); + err = atmel_sha_update_dma_start(dd); + if (err != -EINPROGRESS) + goto finish; + } + } + return; + +finish: + /* finish curent request */ + atmel_sha_finish_req(dd->req, err); +} + +static irqreturn_t atmel_sha_irq(int irq, void *dev_id) +{ + struct atmel_sha_dev *sha_dd = dev_id; + u32 reg; + + reg = atmel_sha_read(sha_dd, SHA_ISR); + if (reg & atmel_sha_read(sha_dd, SHA_IMR)) { + atmel_sha_write(sha_dd, SHA_IDR, reg); + if (SHA_FLAGS_BUSY & sha_dd->flags) { + sha_dd->flags |= SHA_FLAGS_OUTPUT_READY; + if (!(SHA_FLAGS_CPU & sha_dd->flags)) + sha_dd->flags |= SHA_FLAGS_DMA_READY; + tasklet_schedule(&sha_dd->done_task); + } else { + dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n"); + } + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sha_algs); i++) + crypto_unregister_ahash(&sha_algs[i]); +} + +static int atmel_sha_register_algs(struct atmel_sha_dev *dd) +{ + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { + err = crypto_register_ahash(&sha_algs[i]); + if (err) + goto err_sha_algs; + } + + return 0; + +err_sha_algs: + for (j = 0; j < i; j++) + crypto_unregister_ahash(&sha_algs[j]); + + return err; +} + +static int __devinit atmel_sha_probe(struct platform_device *pdev) +{ + struct atmel_sha_dev *sha_dd; + struct device *dev = &pdev->dev; + struct resource *sha_res; + unsigned long sha_phys_size; + int err; + + sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL); + if (sha_dd == NULL) { + dev_err(dev, "unable to alloc data struct.\n"); + err = -ENOMEM; + goto sha_dd_err; + } + + sha_dd->dev = dev; + + platform_set_drvdata(pdev, sha_dd); + + INIT_LIST_HEAD(&sha_dd->list); + + tasklet_init(&sha_dd->done_task, atmel_sha_done_task, + (unsigned long)sha_dd); + + crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH); + + sha_dd->irq = -1; + + /* Get the base address */ + sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!sha_res) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto res_err; + } + sha_dd->phys_base = sha_res->start; + sha_phys_size = resource_size(sha_res); + + /* Get the IRQ */ + sha_dd->irq = platform_get_irq(pdev, 0); + if (sha_dd->irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = sha_dd->irq; + goto res_err; + } + + err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha", + sha_dd); + if (err) { + dev_err(dev, "unable to request sha irq.\n"); + goto res_err; + } + + /* Initializing the clock */ + sha_dd->iclk = clk_get(&pdev->dev, NULL); + if (IS_ERR(sha_dd->iclk)) { + dev_err(dev, "clock intialization failed.\n"); + err = PTR_ERR(sha_dd->iclk); + goto clk_err; + } + + sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size); + if (!sha_dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; + goto sha_io_err; + } + + spin_lock(&atmel_sha.lock); + list_add_tail(&sha_dd->list, &atmel_sha.dev_list); + spin_unlock(&atmel_sha.lock); + + err = atmel_sha_register_algs(sha_dd); + if (err) + goto err_algs; + + dev_info(dev, "Atmel SHA1/SHA256\n"); + + return 0; + +err_algs: + spin_lock(&atmel_sha.lock); + list_del(&sha_dd->list); + spin_unlock(&atmel_sha.lock); + iounmap(sha_dd->io_base); +sha_io_err: + clk_put(sha_dd->iclk); +clk_err: + free_irq(sha_dd->irq, sha_dd); +res_err: + tasklet_kill(&sha_dd->done_task); + kfree(sha_dd); + sha_dd = NULL; +sha_dd_err: + dev_err(dev, "initialization failed.\n"); + + return err; +} + +static int __devexit atmel_sha_remove(struct platform_device *pdev) +{ + static struct atmel_sha_dev *sha_dd; + + sha_dd = platform_get_drvdata(pdev); + if (!sha_dd) + return -ENODEV; + spin_lock(&atmel_sha.lock); + list_del(&sha_dd->list); + spin_unlock(&atmel_sha.lock); + + atmel_sha_unregister_algs(sha_dd); + + tasklet_kill(&sha_dd->done_task); + + iounmap(sha_dd->io_base); + + clk_put(sha_dd->iclk); + + if (sha_dd->irq >= 0) + free_irq(sha_dd->irq, sha_dd); + + kfree(sha_dd); + sha_dd = NULL; + + return 0; +} + +static struct platform_driver atmel_sha_driver = { + .probe = atmel_sha_probe, + .remove = __devexit_p(atmel_sha_remove), + .driver = { + .name = "atmel_sha", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(atmel_sha_driver); + +MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support."); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); -- cgit v0.10.2 From de197533485c09598215802b0e401a688e172573 Mon Sep 17 00:00:00 2001 From: Nicolas Royer Date: Sun, 1 Jul 2012 19:19:47 +0200 Subject: crypto: atmel - add new tests to tcrypt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - set sg buffers size equals to message size - add cfb & ofb tests for AES, DES & TDES Signed-off-by: Nicolas Royer Acked-by: Nicolas Ferre Acked-by: Eric Bénard Tested-by: Eric Bénard Signed-off-by: Herbert Xu diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 58ad4fe..5cf2ccb 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -809,7 +809,7 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec, struct cipher_speed_template *template, unsigned int tcount, u8 *keysize) { - unsigned int ret, i, j, iv_len; + unsigned int ret, i, j, k, iv_len; struct tcrypt_result tresult; const char *key; char iv[128]; @@ -883,11 +883,23 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec, } sg_init_table(sg, TVMEMSIZE); - sg_set_buf(sg, tvmem[0] + *keysize, + + k = *keysize + *b_size; + if (k > PAGE_SIZE) { + sg_set_buf(sg, tvmem[0] + *keysize, PAGE_SIZE - *keysize); - for (j = 1; j < TVMEMSIZE; j++) { - sg_set_buf(sg + j, tvmem[j], PAGE_SIZE); - memset(tvmem[j], 0xff, PAGE_SIZE); + k -= PAGE_SIZE; + j = 1; + while (k > PAGE_SIZE) { + sg_set_buf(sg + j, tvmem[j], PAGE_SIZE); + memset(tvmem[j], 0xff, PAGE_SIZE); + j++; + k -= PAGE_SIZE; + } + sg_set_buf(sg + j, tvmem[j], k); + memset(tvmem[j], 0xff, k); + } else { + sg_set_buf(sg, tvmem[0] + *keysize, *b_size); } iv_len = crypto_ablkcipher_ivsize(tfm); @@ -1520,6 +1532,14 @@ static int do_test(int m) speed_template_16_24_32); test_acipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0, speed_template_16_24_32); + test_acipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("ofb(aes)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_acipher_speed("ofb(aes)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); break; case 501: @@ -1535,6 +1555,18 @@ static int do_test(int m) test_acipher_speed("cbc(des3_ede)", DECRYPT, sec, des3_speed_template, DES3_SPEED_VECTORS, speed_template_24); + test_acipher_speed("cfb(des3_ede)", ENCRYPT, sec, + des3_speed_template, DES3_SPEED_VECTORS, + speed_template_24); + test_acipher_speed("cfb(des3_ede)", DECRYPT, sec, + des3_speed_template, DES3_SPEED_VECTORS, + speed_template_24); + test_acipher_speed("ofb(des3_ede)", ENCRYPT, sec, + des3_speed_template, DES3_SPEED_VECTORS, + speed_template_24); + test_acipher_speed("ofb(des3_ede)", DECRYPT, sec, + des3_speed_template, DES3_SPEED_VECTORS, + speed_template_24); break; case 502: @@ -1546,6 +1578,14 @@ static int do_test(int m) speed_template_8); test_acipher_speed("cbc(des)", DECRYPT, sec, NULL, 0, speed_template_8); + test_acipher_speed("cfb(des)", ENCRYPT, sec, NULL, 0, + speed_template_8); + test_acipher_speed("cfb(des)", DECRYPT, sec, NULL, 0, + speed_template_8); + test_acipher_speed("ofb(des)", ENCRYPT, sec, NULL, 0, + speed_template_8); + test_acipher_speed("ofb(des)", DECRYPT, sec, NULL, 0, + speed_template_8); break; case 503: -- cgit v0.10.2 From d1a0eb98e7a8b7e244e31430fdb1a752243e2698 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Tue, 3 Jul 2012 19:16:51 +0300 Subject: crypto: talitos - move talitos structures to header file This patch moves the talitos structure definitions from c file to its header file so that the same can be shared on need basis. Signed-off-by: Sandeep Malik Signed-off-by: Kim Phillips Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 921039e..bb3e63f 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -53,117 +53,6 @@ #include "talitos.h" -#define TALITOS_TIMEOUT 100000 -#define TALITOS_MAX_DATA_LEN 65535 - -#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f) -#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf) -#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf) - -/* descriptor pointer entry */ -struct talitos_ptr { - __be16 len; /* length */ - u8 j_extent; /* jump to sg link table and/or extent */ - u8 eptr; /* extended address */ - __be32 ptr; /* address */ -}; - -static const struct talitos_ptr zero_entry = { - .len = 0, - .j_extent = 0, - .eptr = 0, - .ptr = 0 -}; - -/* descriptor */ -struct talitos_desc { - __be32 hdr; /* header high bits */ - __be32 hdr_lo; /* header low bits */ - struct talitos_ptr ptr[7]; /* ptr/len pair array */ -}; - -/** - * talitos_request - descriptor submission request - * @desc: descriptor pointer (kernel virtual) - * @dma_desc: descriptor's physical bus address - * @callback: whom to call when descriptor processing is done - * @context: caller context (optional) - */ -struct talitos_request { - struct talitos_desc *desc; - dma_addr_t dma_desc; - void (*callback) (struct device *dev, struct talitos_desc *desc, - void *context, int error); - void *context; -}; - -/* per-channel fifo management */ -struct talitos_channel { - void __iomem *reg; - - /* request fifo */ - struct talitos_request *fifo; - - /* number of requests pending in channel h/w fifo */ - atomic_t submit_count ____cacheline_aligned; - - /* request submission (head) lock */ - spinlock_t head_lock ____cacheline_aligned; - /* index to next free descriptor request */ - int head; - - /* request release (tail) lock */ - spinlock_t tail_lock ____cacheline_aligned; - /* index to next in-progress/done descriptor request */ - int tail; -}; - -struct talitos_private { - struct device *dev; - struct platform_device *ofdev; - void __iomem *reg; - int irq[2]; - - /* SEC global registers lock */ - spinlock_t reg_lock ____cacheline_aligned; - - /* SEC version geometry (from device tree node) */ - unsigned int num_channels; - unsigned int chfifo_len; - unsigned int exec_units; - unsigned int desc_types; - - /* SEC Compatibility info */ - unsigned long features; - - /* - * length of the request fifo - * fifo_len is chfifo_len rounded up to next power of 2 - * so we can use bitwise ops to wrap - */ - unsigned int fifo_len; - - struct talitos_channel *chan; - - /* next channel to be assigned next incoming descriptor */ - atomic_t last_chan ____cacheline_aligned; - - /* request callback tasklet */ - struct tasklet_struct done_task[2]; - - /* list of registered algorithms */ - struct list_head alg_list; - - /* hwrng device */ - struct hwrng rng; -}; - -/* .features flag */ -#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 -#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 -#define TALITOS_FTR_SHA224_HWINIT 0x00000004 -#define TALITOS_FTR_HMAC_OK 0x00000008 - static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) { talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 3c17395..9835e3a 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -28,6 +28,117 @@ * */ +#define TALITOS_TIMEOUT 100000 +#define TALITOS_MAX_DATA_LEN 65535 + +#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f) +#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf) +#define SECONDARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 16) & 0xf) + +/* descriptor pointer entry */ +struct talitos_ptr { + __be16 len; /* length */ + u8 j_extent; /* jump to sg link table and/or extent */ + u8 eptr; /* extended address */ + __be32 ptr; /* address */ +}; + +static const struct talitos_ptr zero_entry = { + .len = 0, + .j_extent = 0, + .eptr = 0, + .ptr = 0 +}; + +/* descriptor */ +struct talitos_desc { + __be32 hdr; /* header high bits */ + __be32 hdr_lo; /* header low bits */ + struct talitos_ptr ptr[7]; /* ptr/len pair array */ +}; + +/** + * talitos_request - descriptor submission request + * @desc: descriptor pointer (kernel virtual) + * @dma_desc: descriptor's physical bus address + * @callback: whom to call when descriptor processing is done + * @context: caller context (optional) + */ +struct talitos_request { + struct talitos_desc *desc; + dma_addr_t dma_desc; + void (*callback) (struct device *dev, struct talitos_desc *desc, + void *context, int error); + void *context; +}; + +/* per-channel fifo management */ +struct talitos_channel { + void __iomem *reg; + + /* request fifo */ + struct talitos_request *fifo; + + /* number of requests pending in channel h/w fifo */ + atomic_t submit_count ____cacheline_aligned; + + /* request submission (head) lock */ + spinlock_t head_lock ____cacheline_aligned; + /* index to next free descriptor request */ + int head; + + /* request release (tail) lock */ + spinlock_t tail_lock ____cacheline_aligned; + /* index to next in-progress/done descriptor request */ + int tail; +}; + +struct talitos_private { + struct device *dev; + struct platform_device *ofdev; + void __iomem *reg; + int irq[2]; + + /* SEC global registers lock */ + spinlock_t reg_lock ____cacheline_aligned; + + /* SEC version geometry (from device tree node) */ + unsigned int num_channels; + unsigned int chfifo_len; + unsigned int exec_units; + unsigned int desc_types; + + /* SEC Compatibility info */ + unsigned long features; + + /* + * length of the request fifo + * fifo_len is chfifo_len rounded up to next power of 2 + * so we can use bitwise ops to wrap + */ + unsigned int fifo_len; + + struct talitos_channel *chan; + + /* next channel to be assigned next incoming descriptor */ + atomic_t last_chan ____cacheline_aligned; + + /* request callback tasklet */ + struct tasklet_struct done_task[2]; + + /* list of registered algorithms */ + struct list_head alg_list; + + /* hwrng device */ + struct hwrng rng; +}; + +/* .features flag */ +#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 +#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 +#define TALITOS_FTR_SHA224_HWINIT 0x00000004 +#define TALITOS_FTR_HMAC_OK 0x00000008 + /* * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register */ -- cgit v0.10.2 From 865d506155b117edc7e668ced373030ce7108ce9 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Tue, 3 Jul 2012 19:16:52 +0300 Subject: crypto: talitos - export the talitos_submit function This patch exports the talitos_submit function so that on need basis same can be used by other entities. Signed-off-by: Sandeep Malik Signed-off-by: Kim Phillips Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index bb3e63f..2561aea 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -192,11 +192,11 @@ static int init_device(struct device *dev) * callback must check err and feedback in descriptor header * for device processing status. */ -static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, - void (*callback)(struct device *dev, - struct talitos_desc *desc, - void *context, int error), - void *context) +int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, + void (*callback)(struct device *dev, + struct talitos_desc *desc, + void *context, int error), + void *context) { struct talitos_private *priv = dev_get_drvdata(dev); struct talitos_request *request; @@ -237,6 +237,7 @@ static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, return -EINPROGRESS; } +EXPORT_SYMBOL(talitos_submit); /* * process what was done, notify callback of error if not diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 9835e3a..41b80f6 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -133,6 +133,12 @@ struct talitos_private { struct hwrng rng; }; +extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, + void (*callback)(struct device *dev, + struct talitos_desc *desc, + void *context, int error), + void *context); + /* .features flag */ #define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 -- cgit v0.10.2 From 357fb60502ede168fa1b76b996298e0045e24b59 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Tue, 3 Jul 2012 19:16:53 +0300 Subject: crypto: talitos - add sha224, sha384 and sha512 to existing AEAD algorithms With this, now all combinations of CBC: AES, 3DES-EDE with HMAC: SHA-1, SHA-224, SHA-256, SHA-384, SHA-512 are supported. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 2561aea..efff788 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -623,7 +623,7 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 -#define TALITOS_MAX_KEY_SIZE 64 +#define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ #define MD5_BLOCK_SIZE 64 @@ -1956,6 +1956,59 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha224-cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { + .cra_name = "authenc(hmac(sha224),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.crypto = { .cra_name = "authenc(hmac(sha256),cbc(aes))", @@ -2011,6 +2064,112 @@ static struct talitos_alg_template driver_algs[] = { }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.crypto = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha384-cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUB | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEUB_SHA384_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { + .cra_name = "authenc(hmac(sha384),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha384-cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUB | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEUB_SHA384_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha512-cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUB | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEUB_SHA512_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { + .cra_name = "authenc(hmac(sha512),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha512-cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_type = &crypto_aead_type, + .cra_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_givencrypt, + .geniv = "", + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + } + }, + .desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUB | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEUB_SHA512_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(md5),cbc(aes))", .cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos", .cra_blocksize = AES_BLOCK_SIZE, diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 41b80f6..61a1405 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -326,6 +326,12 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, DESC_HDR_MODE1_MDEU_HMAC) #define DESC_HDR_MODE1_MDEU_SHA1_HMAC (DESC_HDR_MODE1_MDEU_SHA1 | \ DESC_HDR_MODE1_MDEU_HMAC) +#define DESC_HDR_MODE1_MDEU_SHA224_HMAC (DESC_HDR_MODE1_MDEU_SHA224 | \ + DESC_HDR_MODE1_MDEU_HMAC) +#define DESC_HDR_MODE1_MDEUB_SHA384_HMAC (DESC_HDR_MODE1_MDEUB_SHA384 | \ + DESC_HDR_MODE1_MDEU_HMAC) +#define DESC_HDR_MODE1_MDEUB_SHA512_HMAC (DESC_HDR_MODE1_MDEUB_SHA512 | \ + DESC_HDR_MODE1_MDEU_HMAC) /* direction of overall data flow (DIR) */ #define DESC_HDR_DIR_INBOUND cpu_to_be32(0x00000002) -- cgit v0.10.2 From e46e9a46386bca8e80a6467b5c643dc494861896 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Tue, 3 Jul 2012 19:16:54 +0300 Subject: crypto: testmgr - add aead cbc aes hmac sha1,256,512 test vectors Test vectors were generated starting from existing CBC(AES) test vectors (RFC3602, NIST SP800-38A) and adding HMAC(SHA*) computed with Crypto++ and double-checked with HashCalc. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4308a11..a2ca743 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1721,6 +1721,42 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "authenc(hmac(sha1),cbc(aes))", + .test = alg_test_aead, + .fips_allowed = 1, + .suite = { + .aead = { + .enc = { + .vecs = hmac_sha1_aes_cbc_enc_tv_template, + .count = HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS + } + } + } + }, { + .alg = "authenc(hmac(sha256),cbc(aes))", + .test = alg_test_aead, + .fips_allowed = 1, + .suite = { + .aead = { + .enc = { + .vecs = hmac_sha256_aes_cbc_enc_tv_template, + .count = HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS + } + } + } + }, { + .alg = "authenc(hmac(sha512),cbc(aes))", + .test = alg_test_aead, + .fips_allowed = 1, + .suite = { + .aead = { + .enc = { + .vecs = hmac_sha512_aes_cbc_enc_tv_template, + .count = HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS + } + } + } + }, { .alg = "cbc(aes)", .test = alg_test_skcipher, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 4d84fe4..f8179e0 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -6975,6 +6975,9 @@ static struct cipher_testvec cast6_dec_tv_template[] = { #define AES_DEC_TEST_VECTORS 3 #define AES_CBC_ENC_TEST_VECTORS 4 #define AES_CBC_DEC_TEST_VECTORS 4 +#define HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS 7 +#define HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS 7 +#define HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS 7 #define AES_LRW_ENC_TEST_VECTORS 8 #define AES_LRW_DEC_TEST_VECTORS 8 #define AES_XTS_ENC_TEST_VECTORS 5 @@ -7232,6 +7235,837 @@ static struct cipher_testvec aes_cbc_dec_tv_template[] = { }, }; +static struct aead_testvec hmac_sha1_aes_cbc_enc_tv_template[] = { + { /* RFC 3602 Case 1 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00" + "\x06\xa9\x21\x40\x36\xb8\xa1\x5b" + "\x51\x2e\x03\xd5\x34\x12\x00\x06", + .klen = 8 + 20 + 16, + .iv = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30" + "\xb4\x22\xda\x80\x2c\x9f\xac\x41", + .input = "Single block msg", + .ilen = 16, + .result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8" + "\x27\x08\x94\x2d\xbe\x77\x18\x1a" + "\x1b\x13\xcb\xaf\x89\x5e\xe1\x2c" + "\x13\xc5\x2e\xa3\xcc\xed\xdc\xb5" + "\x03\x71\xa2\x06", + .rlen = 16 + 20, + }, { /* RFC 3602 Case 2 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33" + "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0" + "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a", + .klen = 8 + 20 + 16, + .iv = "\x56\x2e\x17\x99\x6d\x09\x3d\x28" + "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58", + .input = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .ilen = 32, + .result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a" + "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a" + "\x75\x86\x60\x2d\x25\x3c\xff\xf9" + "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1" + "\xad\x9b\x4c\x5c\x85\xe1\xda\xae" + "\xee\x81\x4e\xd7\xdb\x74\xcf\x58" + "\x65\x39\xf8\xde", + .rlen = 32 + 20, + }, { /* RFC 3602 Case 3 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55" + "\x6c\x3e\xa0\x47\x76\x30\xce\x21" + "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd", + .klen = 8 + 20 + 16, + .iv = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb" + "\xd9\xcd\x27\xd8\x25\x68\x2c\x81", + .input = "This is a 48-byte message (exactly 3 AES blocks)", + .ilen = 48, + .result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53" + "\xd4\x93\x66\x5d\x33\xf0\xe8\x86" + "\x2d\xea\x54\xcd\xb2\x93\xab\xc7" + "\x50\x69\x39\x27\x67\x72\xf8\xd5" + "\x02\x1c\x19\x21\x6b\xad\x52\x5c" + "\x85\x79\x69\x5d\x83\xba\x26\x84" + "\xc2\xec\x0c\xf8\x7f\x05\xba\xca" + "\xff\xee\x4c\xd0\x93\xe6\x36\x7f" + "\x8d\x62\xf2\x1e", + .rlen = 48 + 20, + }, { /* RFC 3602 Case 4 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55" + "\x56\xe4\x7a\x38\xc5\x59\x89\x74" + "\xbc\x46\x90\x3d\xba\x29\x03\x49", + .klen = 8 + 20 + 16, + .iv = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c" + "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9", + .input = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf", + .ilen = 64, + .result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e" + "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa" + "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6" + "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e" + "\x35\x90\x7a\xa6\x32\xc3\xff\xdf" + "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad" + "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d" + "\x49\xa5\x3e\x87\xf4\xc3\xda\x55" + "\x1c\x45\x57\xa9\x56\xcb\xa9\x2d" + "\x18\xac\xf1\xc7\x5d\xd1\xcd\x0d" + "\x1d\xbe\xc6\xe9", + .rlen = 64 + 20, + }, { /* RFC 3602 Case 5 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55" + "\x90\xd3\x82\xb4\x10\xee\xba\x7a" + "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf", + .klen = 8 + 20 + 16, + .iv = "\xe9\x6e\x8c\x08\xab\x46\x57\x63" + "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93", + .assoc = "\x00\x00\x43\x21\x00\x00\x00\x01", + .alen = 8, + .input = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00" + "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01", + .ilen = 80, + .result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6" + "\xa9\x45\x3e\x19\x4e\x12\x08\x49" + "\xa4\x87\x0b\x66\xcc\x6b\x99\x65" + "\x33\x00\x13\xb4\x89\x8d\xc8\x56" + "\xa4\x69\x9e\x52\x3a\x55\xdb\x08" + "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52" + "\x77\x5b\x07\xd1\xdb\x34\xed\x9c" + "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a" + "\xa2\x69\xad\xd0\x47\xad\x2d\x59" + "\x13\xac\x19\xb7\xcf\xba\xd4\xa6" + "\x58\xc6\x84\x75\xe4\xe9\x6b\x0c" + "\xe1\xc5\x0b\x73\x4d\x82\x55\xa8" + "\x85\xe1\x59\xf7", + .rlen = 80 + 20, + }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x18" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55" + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" + "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + .klen = 8 + 20 + 24, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d" + "\x71\x78\x18\x3a\x9f\xa0\x71\xe8" + "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4" + "\xe5\xe7\x38\x76\x3f\x69\x14\x5a" + "\x57\x1b\x24\x20\x12\xfb\x7a\xe0" + "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0" + "\x08\xb0\xe2\x79\x88\x59\x88\x81" + "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd" + "\x73\xe3\x19\x3f\x8b\xc9\xc6\xf4" + "\x5a\xf1\x5b\xa8\x98\x07\xc5\x36" + "\x47\x4c\xfc\x36", + .rlen = 64 + 20, + }, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x20" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55" + "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .klen = 8 + 20 + 32, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba" + "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6" + "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d" + "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d" + "\x39\xf2\x33\x69\xa9\xd9\xba\xcf" + "\xa5\x30\xe2\x63\x04\x23\x14\x61" + "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc" + "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b" + "\xa3\xe8\x9b\x17\xe3\xf4\x7f\xde" + "\x1b\x9f\xc6\x81\x26\x43\x4a\x87" + "\x51\xee\xd6\x4e", + .rlen = 64 + 20, + }, +}; + +static struct aead_testvec hmac_sha256_aes_cbc_enc_tv_template[] = { + { /* RFC 3602 Case 1 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x06\xa9\x21\x40\x36\xb8\xa1\x5b" + "\x51\x2e\x03\xd5\x34\x12\x00\x06", + .klen = 8 + 32 + 16, + .iv = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30" + "\xb4\x22\xda\x80\x2c\x9f\xac\x41", + .input = "Single block msg", + .ilen = 16, + .result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8" + "\x27\x08\x94\x2d\xbe\x77\x18\x1a" + "\xcc\xde\x2d\x6a\xae\xf1\x0b\xcc" + "\x38\x06\x38\x51\xb4\xb8\xf3\x5b" + "\x5c\x34\xa6\xa3\x6e\x0b\x05\xe5" + "\x6a\x6d\x44\xaa\x26\xa8\x44\xa5", + .rlen = 16 + 32, + }, { /* RFC 3602 Case 2 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0" + "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a", + .klen = 8 + 32 + 16, + .iv = "\x56\x2e\x17\x99\x6d\x09\x3d\x28" + "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58", + .input = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .ilen = 32, + .result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a" + "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a" + "\x75\x86\x60\x2d\x25\x3c\xff\xf9" + "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1" + "\xf5\x33\x53\xf3\x68\x85\x2a\x99" + "\x0e\x06\x58\x8f\xba\xf6\x06\xda" + "\x49\x69\x0d\x5b\xd4\x36\x06\x62" + "\x35\x5e\x54\x58\x53\x4d\xdf\xbf", + .rlen = 32 + 32, + }, { /* RFC 3602 Case 3 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x6c\x3e\xa0\x47\x76\x30\xce\x21" + "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd", + .klen = 8 + 32 + 16, + .iv = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb" + "\xd9\xcd\x27\xd8\x25\x68\x2c\x81", + .input = "This is a 48-byte message (exactly 3 AES blocks)", + .ilen = 48, + .result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53" + "\xd4\x93\x66\x5d\x33\xf0\xe8\x86" + "\x2d\xea\x54\xcd\xb2\x93\xab\xc7" + "\x50\x69\x39\x27\x67\x72\xf8\xd5" + "\x02\x1c\x19\x21\x6b\xad\x52\x5c" + "\x85\x79\x69\x5d\x83\xba\x26\x84" + "\x68\xb9\x3e\x90\x38\xa0\x88\x01" + "\xe7\xc6\xce\x10\x31\x2f\x9b\x1d" + "\x24\x78\xfb\xbe\x02\xe0\x4f\x40" + "\x10\xbd\xaa\xc6\xa7\x79\xe0\x1a", + .rlen = 48 + 32, + }, { /* RFC 3602 Case 4 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x56\xe4\x7a\x38\xc5\x59\x89\x74" + "\xbc\x46\x90\x3d\xba\x29\x03\x49", + .klen = 8 + 32 + 16, + .iv = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c" + "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9", + .input = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf", + .ilen = 64, + .result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e" + "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa" + "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6" + "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e" + "\x35\x90\x7a\xa6\x32\xc3\xff\xdf" + "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad" + "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d" + "\x49\xa5\x3e\x87\xf4\xc3\xda\x55" + "\x7a\x1b\xd4\x3c\xdb\x17\x95\xe2" + "\xe0\x93\xec\xc9\x9f\xf7\xce\xd8" + "\x3f\x54\xe2\x49\x39\xe3\x71\x25" + "\x2b\x6c\xe9\x5d\xec\xec\x2b\x64", + .rlen = 64 + 32, + }, { /* RFC 3602 Case 5 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x90\xd3\x82\xb4\x10\xee\xba\x7a" + "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf", + .klen = 8 + 32 + 16, + .iv = "\xe9\x6e\x8c\x08\xab\x46\x57\x63" + "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93", + .assoc = "\x00\x00\x43\x21\x00\x00\x00\x01", + .alen = 8, + .input = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00" + "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01", + .ilen = 80, + .result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6" + "\xa9\x45\x3e\x19\x4e\x12\x08\x49" + "\xa4\x87\x0b\x66\xcc\x6b\x99\x65" + "\x33\x00\x13\xb4\x89\x8d\xc8\x56" + "\xa4\x69\x9e\x52\x3a\x55\xdb\x08" + "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52" + "\x77\x5b\x07\xd1\xdb\x34\xed\x9c" + "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a" + "\xa2\x69\xad\xd0\x47\xad\x2d\x59" + "\x13\xac\x19\xb7\xcf\xba\xd4\xa6" + "\xbb\xd4\x0f\xbe\xa3\x3b\x4c\xb8" + "\x3a\xd2\xe1\x03\x86\xa5\x59\xb7" + "\x73\xc3\x46\x20\x2c\xb1\xef\x68" + "\xbb\x8a\x32\x7e\x12\x8c\x69\xcf", + .rlen = 80 + 32, + }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x18" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" + "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + .klen = 8 + 32 + 24, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d" + "\x71\x78\x18\x3a\x9f\xa0\x71\xe8" + "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4" + "\xe5\xe7\x38\x76\x3f\x69\x14\x5a" + "\x57\x1b\x24\x20\x12\xfb\x7a\xe0" + "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0" + "\x08\xb0\xe2\x79\x88\x59\x88\x81" + "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd" + "\x2f\xee\x5f\xdb\x66\xfe\x79\x09" + "\x61\x81\x31\xea\x5b\x3d\x8e\xfb" + "\xca\x71\x85\x93\xf7\x85\x55\x8b" + "\x7a\xe4\x94\xca\x8b\xba\x19\x33", + .rlen = 64 + 32, + }, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x20" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .klen = 8 + 32 + 32, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba" + "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6" + "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d" + "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d" + "\x39\xf2\x33\x69\xa9\xd9\xba\xcf" + "\xa5\x30\xe2\x63\x04\x23\x14\x61" + "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc" + "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b" + "\x24\x29\xed\xc2\x31\x49\xdb\xb1" + "\x8f\x74\xbd\x17\x92\x03\xbe\x8f" + "\xf3\x61\xde\x1c\xe9\xdb\xcd\xd0" + "\xcc\xce\xe9\x85\x57\xcf\x6f\x5f", + .rlen = 64 + 32, + }, +}; + +static struct aead_testvec hmac_sha512_aes_cbc_enc_tv_template[] = { + { /* RFC 3602 Case 1 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x06\xa9\x21\x40\x36\xb8\xa1\x5b" + "\x51\x2e\x03\xd5\x34\x12\x00\x06", + .klen = 8 + 64 + 16, + .iv = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30" + "\xb4\x22\xda\x80\x2c\x9f\xac\x41", + .input = "Single block msg", + .ilen = 16, + .result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8" + "\x27\x08\x94\x2d\xbe\x77\x18\x1a" + "\x3f\xdc\xad\x90\x03\x63\x5e\x68" + "\xc3\x13\xdd\xa4\x5c\x4d\x54\xa7" + "\x19\x6e\x03\x75\x2b\xa1\x62\xce" + "\xe0\xc6\x96\x75\xb2\x14\xca\x96" + "\xec\xbd\x50\x08\x07\x64\x1a\x49" + "\xe8\x9a\x7c\x06\x3d\xcb\xff\xb2" + "\xfa\x20\x89\xdd\x9c\xac\x9e\x16" + "\x18\x8a\xa0\x6d\x01\x6c\xa3\x3a", + .rlen = 16 + 64, + }, { /* RFC 3602 Case 2 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0" + "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a", + .klen = 8 + 64 + 16, + .iv = "\x56\x2e\x17\x99\x6d\x09\x3d\x28" + "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58", + .input = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .ilen = 32, + .result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a" + "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a" + "\x75\x86\x60\x2d\x25\x3c\xff\xf9" + "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1" + "\xda\xb2\x0c\xb2\x26\xc4\xd5\xef" + "\x60\x38\xa4\x5e\x9a\x8c\x1b\x41" + "\x03\x9f\xc4\x64\x7f\x01\x42\x9b" + "\x0e\x1b\xea\xef\xbc\x88\x19\x5e" + "\x31\x7e\xc2\x95\xfc\x09\x32\x0a" + "\x46\x32\x7c\x41\x9c\x59\x3e\xe9" + "\x8f\x9f\xd4\x31\xd6\x22\xbd\xf8" + "\xf7\x0a\x94\xe5\xa9\xc3\xf6\x9d", + .rlen = 32 + 64, + }, { /* RFC 3602 Case 3 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x33\x44\x55\x66\x77\x88\x99\xaa" + "\xbb\xcc\xdd\xee\xff\x11\x22\x33" + "\x44\x55\x66\x77\x88\x99\xaa\xbb" + "\xcc\xdd\xee\xff\x11\x22\x33\x44" + "\x6c\x3e\xa0\x47\x76\x30\xce\x21" + "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd", + .klen = 8 + 64 + 16, + .iv = "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb" + "\xd9\xcd\x27\xd8\x25\x68\x2c\x81", + .input = "This is a 48-byte message (exactly 3 AES blocks)", + .ilen = 48, + .result = "\xd0\xa0\x2b\x38\x36\x45\x17\x53" + "\xd4\x93\x66\x5d\x33\xf0\xe8\x86" + "\x2d\xea\x54\xcd\xb2\x93\xab\xc7" + "\x50\x69\x39\x27\x67\x72\xf8\xd5" + "\x02\x1c\x19\x21\x6b\xad\x52\x5c" + "\x85\x79\x69\x5d\x83\xba\x26\x84" + "\x64\x19\x17\x5b\x57\xe0\x21\x0f" + "\xca\xdb\xa1\x26\x38\x14\xa2\x69" + "\xdb\x54\x67\x80\xc0\x54\xe0\xfd" + "\x3e\x91\xe7\x91\x7f\x13\x38\x44" + "\xb7\xb1\xd6\xc8\x7d\x48\x8d\x41" + "\x08\xea\x29\x6c\x74\x67\x3f\xb0" + "\xac\x7f\x5c\x1d\xf5\xee\x22\x66" + "\x27\xa6\xb6\x13\xba\xba\xf0\xc2", + .rlen = 48 + 64, + }, { /* RFC 3602 Case 4 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x33\x44\x55\x66\x77\x88\x99\xaa" + "\xbb\xcc\xdd\xee\xff\x11\x22\x33" + "\x44\x55\x66\x77\x88\x99\xaa\xbb" + "\xcc\xdd\xee\xff\x11\x22\x33\x44" + "\x56\xe4\x7a\x38\xc5\x59\x89\x74" + "\xbc\x46\x90\x3d\xba\x29\x03\x49", + .klen = 8 + 64 + 16, + .iv = "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c" + "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9", + .input = "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf", + .ilen = 64, + .result = "\xc3\x0e\x32\xff\xed\xc0\x77\x4e" + "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa" + "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6" + "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e" + "\x35\x90\x7a\xa6\x32\xc3\xff\xdf" + "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad" + "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d" + "\x49\xa5\x3e\x87\xf4\xc3\xda\x55" + "\x82\xcd\x42\x28\x21\x20\x15\xcc" + "\xb7\xb2\x48\x40\xc7\x64\x41\x3a" + "\x61\x32\x82\x85\xcf\x27\xed\xb4" + "\xe4\x68\xa2\xf5\x79\x26\x27\xb2" + "\x51\x67\x6a\xc4\xf0\x66\x55\x50" + "\xbc\x6f\xed\xd5\x8d\xde\x23\x7c" + "\x62\x98\x14\xd7\x2f\x37\x8d\xdf" + "\xf4\x33\x80\xeb\x8e\xb4\xa4\xda", + .rlen = 64 + 64, + }, { /* RFC 3602 Case 5 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x10" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x33\x44\x55\x66\x77\x88\x99\xaa" + "\xbb\xcc\xdd\xee\xff\x11\x22\x33" + "\x44\x55\x66\x77\x88\x99\xaa\xbb" + "\xcc\xdd\xee\xff\x11\x22\x33\x44" + "\x90\xd3\x82\xb4\x10\xee\xba\x7a" + "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf", + .klen = 8 + 64 + 16, + .iv = "\xe9\x6e\x8c\x08\xab\x46\x57\x63" + "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93", + .assoc = "\x00\x00\x43\x21\x00\x00\x00\x01", + .alen = 8, + .input = "\x08\x00\x0e\xbd\xa7\x0a\x00\x00" + "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01", + .ilen = 80, + .result = "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6" + "\xa9\x45\x3e\x19\x4e\x12\x08\x49" + "\xa4\x87\x0b\x66\xcc\x6b\x99\x65" + "\x33\x00\x13\xb4\x89\x8d\xc8\x56" + "\xa4\x69\x9e\x52\x3a\x55\xdb\x08" + "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52" + "\x77\x5b\x07\xd1\xdb\x34\xed\x9c" + "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a" + "\xa2\x69\xad\xd0\x47\xad\x2d\x59" + "\x13\xac\x19\xb7\xcf\xba\xd4\xa6" + "\x74\x84\x94\xe2\xd7\x7a\xf9\xbf" + "\x00\x8a\xa2\xd5\xb7\xf3\x60\xcf" + "\xa0\x47\xdf\x4e\x09\xf4\xb1\x7f" + "\x14\xd9\x3d\x53\x8e\x12\xb3\x00" + "\x4c\x0a\x4e\x32\x40\x43\x88\xce" + "\x92\x26\xc1\x76\x20\x11\xeb\xba" + "\x62\x4f\x9a\x62\x25\xc3\x75\x80" + "\xb7\x0a\x17\xf5\xd7\x94\xb4\x14", + .rlen = 80 + 64, + }, { /* NIST SP800-38A F.2.3 CBC-AES192.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x18" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x33\x44\x55\x66\x77\x88\x99\xaa" + "\xbb\xcc\xdd\xee\xff\x11\x22\x33" + "\x44\x55\x66\x77\x88\x99\xaa\xbb" + "\xcc\xdd\xee\xff\x11\x22\x33\x44" + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" + "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + .klen = 8 + 64 + 24, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d" + "\x71\x78\x18\x3a\x9f\xa0\x71\xe8" + "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4" + "\xe5\xe7\x38\x76\x3f\x69\x14\x5a" + "\x57\x1b\x24\x20\x12\xfb\x7a\xe0" + "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0" + "\x08\xb0\xe2\x79\x88\x59\x88\x81" + "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd" + "\x77\x4b\x69\x9d\x3a\x0d\xb4\x99" + "\x8f\xc6\x8e\x0e\x72\x58\xe3\x56" + "\xbb\x21\xd2\x7d\x93\x11\x17\x91" + "\xc4\x83\xfd\x0a\xea\x71\xfe\x77" + "\xae\x6f\x0a\xa5\xf0\xcf\xe1\x35" + "\xba\x03\xd5\x32\xfa\x5f\x41\x58" + "\x8d\x43\x98\xa7\x94\x16\x07\x02" + "\x0f\xb6\x81\x50\x28\x95\x2e\x75", + .rlen = 64 + 64, + }, { /* NIST SP800-38A F.2.5 CBC-AES256.Encrypt */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x20" /* enc key length */ + "\x11\x22\x33\x44\x55\x66\x77\x88" + "\x99\xaa\xbb\xcc\xdd\xee\xff\x11" + "\x22\x33\x44\x55\x66\x77\x88\x99" + "\xaa\xbb\xcc\xdd\xee\xff\x11\x22" + "\x33\x44\x55\x66\x77\x88\x99\xaa" + "\xbb\xcc\xdd\xee\xff\x11\x22\x33" + "\x44\x55\x66\x77\x88\x99\xaa\xbb" + "\xcc\xdd\xee\xff\x11\x22\x33\x44" + "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .klen = 8 + 64 + 32, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ilen = 64, + .result = "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba" + "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6" + "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d" + "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d" + "\x39\xf2\x33\x69\xa9\xd9\xba\xcf" + "\xa5\x30\xe2\x63\x04\x23\x14\x61" + "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc" + "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b" + "\xb2\x27\x69\x7f\x45\x64\x79\x2b" + "\xb7\xb8\x4c\xd4\x75\x94\x68\x40" + "\x2a\xea\x91\xc7\x3f\x7c\xed\x7b" + "\x95\x2c\x9b\xa8\xf5\xe5\x52\x8d" + "\x6b\xe1\xae\xf1\x74\xfa\x0d\x0c" + "\xe3\x8d\x64\xc3\x8d\xff\x7c\x8c" + "\xdb\xbf\xa0\xb4\x01\xa2\xa8\xa2" + "\x2c\xb1\x62\x2c\x10\xca\xf1\x21", + .rlen = 64 + 64, + }, +}; + static struct cipher_testvec aes_lrw_enc_tv_template[] = { /* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */ { /* LRW-32-AES 1 */ -- cgit v0.10.2 From a43478863b16cb0986fd2ec9d1f1b9ebaaec5922 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Thu, 5 Jul 2012 20:43:58 +0200 Subject: crypto: twofish-avx - remove useless instruction The register %rdx is written, but never read till the end of the encryption routine. Therefore let's delete the useless instruction. Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index fc31b89..35f4557 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -243,7 +243,6 @@ __twofish_enc_blk_8way: popq %rbx; leaq (4*4*4)(%rsi), %rax; - leaq (4*4*4)(%rax), %rdx; testb %cl, %cl; jnz __enc_xor8; -- cgit v0.10.2