From 269ab459da46ae37979a0d16307d1fcaa05600b2 Mon Sep 17 00:00:00 2001
From: Richard Hartmann <richih.mailinglist@gmail.com>
Date: Wed, 10 Mar 2010 18:24:46 +0800
Subject: crypto: internal - Fix checkpatch errors

Signed-off-by: Richard Hartmann <richih.mailinglist@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/internal.h b/crypto/internal.h
index 2d22636..d4384b0 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -6,7 +6,7 @@
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
+ * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
  */
-- 
cgit v0.10.2


From 12387a46bb150f5608de4aa9a90dfdddbf991e3f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 10 Mar 2010 18:28:55 +0800
Subject: crypto: aesni-intel - Add AES-NI accelerated CTR mode

To take advantage of the hardware pipeline implementation of AES-NI
instructions. CTR mode cryption is implemented in ASM to schedule
multiple AES-NI instructions one after another. This way, some latency
of AES-NI instruction can be eliminated.

Performance testing based on dm-crypt should 50% reduction of
ecryption/decryption time.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 20bb0e1..822846a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,9 @@
 #define IN	IN1
 #define KEY	%xmm2
 #define IV	%xmm3
+#define BSWAP_MASK %xmm10
+#define CTR	%xmm11
+#define INC	%xmm12
 
 #define KEYP	%rdi
 #define OUTP	%rsi
@@ -42,6 +45,7 @@
 #define T1	%r10
 #define TKEYP	T1
 #define T2	%r11
+#define TCTR_LOW T2
 
 _key_expansion_128:
 _key_expansion_256a:
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
 	movups IV, (IVP)
 .Lcbc_dec_just_ret:
 	ret
+
+.align 16
+.Lbswap_mask:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * _aesni_inc_init:	internal ABI
+ *	setup registers used by _aesni_inc
+ * input:
+ *	IV
+ * output:
+ *	CTR:	== IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ *	INC:	== 1, in little endian
+ *	BSWAP_MASK == endian swapping mask
+ */
+_aesni_inc_init:
+	movaps .Lbswap_mask, BSWAP_MASK
+	movaps IV, CTR
+	PSHUFB_XMM BSWAP_MASK CTR
+	mov $1, TCTR_LOW
+	movq TCTR_LOW, INC
+	movq CTR, TCTR_LOW
+	ret
+
+/*
+ * _aesni_inc:		internal ABI
+ *	Increase IV by 1, IV is in big endian
+ * input:
+ *	IV
+ *	CTR:	== IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ *	INC:	== 1, in little endian
+ *	BSWAP_MASK == endian swapping mask
+ * output:
+ *	IV:	Increase by 1
+ * changed:
+ *	CTR:	== output IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ */
+_aesni_inc:
+	paddq INC, CTR
+	add $1, TCTR_LOW
+	jnc .Linc_low
+	pslldq $8, INC
+	paddq INC, CTR
+	psrldq $8, INC
+.Linc_low:
+	movaps CTR, IV
+	PSHUFB_XMM BSWAP_MASK IV
+	ret
+
+/*
+ * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len, u8 *iv)
+ */
+ENTRY(aesni_ctr_enc)
+	cmp $16, LEN
+	jb .Lctr_enc_just_ret
+	mov 480(KEYP), KLEN
+	movups (IVP), IV
+	call _aesni_inc_init
+	cmp $64, LEN
+	jb .Lctr_enc_loop1
+.align 4
+.Lctr_enc_loop4:
+	movaps IV, STATE1
+	call _aesni_inc
+	movups (INP), IN1
+	movaps IV, STATE2
+	call _aesni_inc
+	movups 0x10(INP), IN2
+	movaps IV, STATE3
+	call _aesni_inc
+	movups 0x20(INP), IN3
+	movaps IV, STATE4
+	call _aesni_inc
+	movups 0x30(INP), IN4
+	call _aesni_enc4
+	pxor IN1, STATE1
+	movups STATE1, (OUTP)
+	pxor IN2, STATE2
+	movups STATE2, 0x10(OUTP)
+	pxor IN3, STATE3
+	movups STATE3, 0x20(OUTP)
+	pxor IN4, STATE4
+	movups STATE4, 0x30(OUTP)
+	sub $64, LEN
+	add $64, INP
+	add $64, OUTP
+	cmp $64, LEN
+	jge .Lctr_enc_loop4
+	cmp $16, LEN
+	jb .Lctr_enc_ret
+.align 4
+.Lctr_enc_loop1:
+	movaps IV, STATE
+	call _aesni_inc
+	movups (INP), IN
+	call _aesni_enc1
+	pxor IN, STATE
+	movups STATE, (OUTP)
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lctr_enc_loop1
+.Lctr_enc_ret:
+	movups IV, (IVP)
+.Lctr_enc_just_ret:
+	ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 49c552c..2cb3dcc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -18,6 +18,7 @@
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
+#include <crypto/ctr.h>
 #include <asm/i387.h>
 #include <asm/aes.h>
 
@@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len, u8 *iv);
 
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 {
@@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = {
 	},
 };
 
+static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
+			    struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u8 keystream[AES_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	aesni_enc(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+	crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc,
+		     struct scatterlist *dst, struct scatterlist *src,
+		     unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+		aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK, walk.iv);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	if (walk.nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "__ctr-aes-aesni",
+	.cra_driver_name	= "__driver-ctr-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
 static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
@@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = {
 	},
 };
 
-#ifdef HAS_CTR
 static int ablk_ctr_init(struct crypto_tfm *tfm)
 {
 	struct cryptd_ablkcipher *cryptd_tfm;
 
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
-					     0, 0);
+	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 	ablk_init_common(tfm, cryptd_tfm);
@@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = {
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
+			.decrypt	= ablk_encrypt,
 			.geniv		= "chainiv",
 		},
 	},
 };
+
+#ifdef HAS_CTR
+static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(
+		"rfc3686(__driver-ctr-aes-aesni)", 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_rfc3686_ctr_alg = {
+	.cra_name		= "rfc3686(ctr(aes))",
+	.cra_driver_name	= "rfc3686-ctr-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
+	.cra_init		= ablk_rfc3686_ctr_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+			.ivsize	     = CTR_RFC3686_IV_SIZE,
+			.setkey	     = ablk_set_key,
+			.encrypt     = ablk_encrypt,
+			.decrypt     = ablk_decrypt,
+			.geniv	     = "seqiv",
+		},
+	},
+};
 #endif
 
 #ifdef HAS_LRW
@@ -640,13 +746,17 @@ static int __init aesni_init(void)
 		goto blk_ecb_err;
 	if ((err = crypto_register_alg(&blk_cbc_alg)))
 		goto blk_cbc_err;
+	if ((err = crypto_register_alg(&blk_ctr_alg)))
+		goto blk_ctr_err;
 	if ((err = crypto_register_alg(&ablk_ecb_alg)))
 		goto ablk_ecb_err;
 	if ((err = crypto_register_alg(&ablk_cbc_alg)))
 		goto ablk_cbc_err;
-#ifdef HAS_CTR
 	if ((err = crypto_register_alg(&ablk_ctr_alg)))
 		goto ablk_ctr_err;
+#ifdef HAS_CTR
+	if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
+		goto ablk_rfc3686_ctr_err;
 #endif
 #ifdef HAS_LRW
 	if ((err = crypto_register_alg(&ablk_lrw_alg)))
@@ -675,13 +785,17 @@ ablk_pcbc_err:
 ablk_lrw_err:
 #endif
 #ifdef HAS_CTR
+	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
+ablk_rfc3686_ctr_err:
+#endif
 	crypto_unregister_alg(&ablk_ctr_alg);
 ablk_ctr_err:
-#endif
 	crypto_unregister_alg(&ablk_cbc_alg);
 ablk_cbc_err:
 	crypto_unregister_alg(&ablk_ecb_alg);
 ablk_ecb_err:
+	crypto_unregister_alg(&blk_ctr_alg);
+blk_ctr_err:
 	crypto_unregister_alg(&blk_cbc_alg);
 blk_cbc_err:
 	crypto_unregister_alg(&blk_ecb_alg);
@@ -705,10 +819,12 @@ static void __exit aesni_exit(void)
 	crypto_unregister_alg(&ablk_lrw_alg);
 #endif
 #ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_ctr_alg);
+	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
 #endif
+	crypto_unregister_alg(&ablk_ctr_alg);
 	crypto_unregister_alg(&ablk_cbc_alg);
 	crypto_unregister_alg(&ablk_ecb_alg);
+	crypto_unregister_alg(&blk_ctr_alg);
 	crypto_unregister_alg(&blk_cbc_alg);
 	crypto_unregister_alg(&blk_ecb_alg);
 	crypto_unregister_alg(&__aesni_alg);
-- 
cgit v0.10.2


From 18bcc9194da3c97e8f458fb1b06ac5b9b35fb23f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 10 Mar 2010 18:30:32 +0800
Subject: crypto: tcrypt - Speed testing support for ghash

Because ghash needs setkey, the setkey and keysize template support
for test_hash_speed is added.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index aa3f84c..0b7a843 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -437,6 +437,9 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 			goto out;
 		}
 
+		if (speed[i].klen)
+			crypto_hash_setkey(tfm, tvmem[0], speed[i].klen);
+
 		printk(KERN_INFO "test%3u "
 		       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
 		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
@@ -881,6 +884,10 @@ static int do_test(int m)
 		test_hash_speed("rmd320", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
+	case 318:
+		test_hash_speed("ghash-generic", sec, hash_speed_template_16);
+		if (mode > 300 && mode < 400) break;
+
 	case 399:
 		break;
 
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 966bbfa..10cb925 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -25,6 +25,7 @@ struct cipher_speed_template {
 struct hash_speed {
 	unsigned int blen;	/* buffer length */
 	unsigned int plen;	/* per-update length */
+	unsigned int klen;	/* key length */
 };
 
 /*
@@ -83,4 +84,32 @@ static struct hash_speed generic_hash_speed_template[] = {
 	{  .blen = 0,	.plen = 0, }
 };
 
+static struct hash_speed hash_speed_template_16[] = {
+	{ .blen = 16,	.plen = 16,	.klen = 16, },
+	{ .blen = 64,	.plen = 16,	.klen = 16, },
+	{ .blen = 64,	.plen = 64,	.klen = 16, },
+	{ .blen = 256,	.plen = 16,	.klen = 16, },
+	{ .blen = 256,	.plen = 64,	.klen = 16, },
+	{ .blen = 256,	.plen = 256,	.klen = 16, },
+	{ .blen = 1024,	.plen = 16,	.klen = 16, },
+	{ .blen = 1024,	.plen = 256,	.klen = 16, },
+	{ .blen = 1024,	.plen = 1024,	.klen = 16, },
+	{ .blen = 2048,	.plen = 16,	.klen = 16, },
+	{ .blen = 2048,	.plen = 256,	.klen = 16, },
+	{ .blen = 2048,	.plen = 1024,	.klen = 16, },
+	{ .blen = 2048,	.plen = 2048,	.klen = 16, },
+	{ .blen = 4096,	.plen = 16,	.klen = 16, },
+	{ .blen = 4096,	.plen = 256,	.klen = 16, },
+	{ .blen = 4096,	.plen = 1024,	.klen = 16, },
+	{ .blen = 4096,	.plen = 4096,	.klen = 16, },
+	{ .blen = 8192,	.plen = 16,	.klen = 16, },
+	{ .blen = 8192,	.plen = 256,	.klen = 16, },
+	{ .blen = 8192,	.plen = 1024,	.klen = 16, },
+	{ .blen = 8192,	.plen = 4096,	.klen = 16, },
+	{ .blen = 8192,	.plen = 8192,	.klen = 16, },
+
+	/* End marker */
+	{  .blen = 0,	.plen = 0,	.klen = 0, }
+};
+
 #endif	/* _CRYPTO_TCRYPT_H */
-- 
cgit v0.10.2


From 32cbd7dfce93382a70f155bf539871b4c55bed29 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sat, 13 Mar 2010 16:28:42 +0800
Subject: crypto: aesni-intel - Fix CTR optimization build failure with gas
 2.16.1

Andrew Morton reported that AES-NI CTR optimization failed to compile
with gas 2.16.1, the error message is as follow:

arch/x86/crypto/aesni-intel_asm.S: Assembler messages:
arch/x86/crypto/aesni-intel_asm.S:752: Error: suffix or operands invalid for `movq'
arch/x86/crypto/aesni-intel_asm.S:753: Error: suffix or operands invalid for `movq'

To fix this, a gas macro is defined to assemble movq with 64bit
general purpose registers and XMM registers. The macro will generate
the raw .byte sequence for needed instructions.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 822846a..ff16756 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -749,8 +749,8 @@ _aesni_inc_init:
 	movaps IV, CTR
 	PSHUFB_XMM BSWAP_MASK CTR
 	mov $1, TCTR_LOW
-	movq TCTR_LOW, INC
-	movq CTR, TCTR_LOW
+	MOVQ_R64_XMM TCTR_LOW INC
+	MOVQ_R64_XMM CTR TCTR_LOW
 	ret
 
 /*
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 14cf526..840a399 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -7,7 +7,66 @@
 
 #ifdef __ASSEMBLY__
 
+#define REG_NUM_INVALID		100
+
+#define REG_TYPE_R64		0
+#define REG_TYPE_XMM		1
+#define REG_TYPE_INVALID	100
+
+	.macro R64_NUM opd r64
+	\opd = REG_NUM_INVALID
+	.ifc \r64,%rax
+	\opd = 0
+	.endif
+	.ifc \r64,%rcx
+	\opd = 1
+	.endif
+	.ifc \r64,%rdx
+	\opd = 2
+	.endif
+	.ifc \r64,%rbx
+	\opd = 3
+	.endif
+	.ifc \r64,%rsp
+	\opd = 4
+	.endif
+	.ifc \r64,%rbp
+	\opd = 5
+	.endif
+	.ifc \r64,%rsi
+	\opd = 6
+	.endif
+	.ifc \r64,%rdi
+	\opd = 7
+	.endif
+	.ifc \r64,%r8
+	\opd = 8
+	.endif
+	.ifc \r64,%r9
+	\opd = 9
+	.endif
+	.ifc \r64,%r10
+	\opd = 10
+	.endif
+	.ifc \r64,%r11
+	\opd = 11
+	.endif
+	.ifc \r64,%r12
+	\opd = 12
+	.endif
+	.ifc \r64,%r13
+	\opd = 13
+	.endif
+	.ifc \r64,%r14
+	\opd = 14
+	.endif
+	.ifc \r64,%r15
+	\opd = 15
+	.endif
+	.endm
+
 	.macro XMM_NUM opd xmm
+	\opd = REG_NUM_INVALID
 	.ifc \xmm,%xmm0
 	\opd = 0
 	.endif
@@ -58,13 +117,25 @@
 	.endif
 	.endm
 
+	.macro REG_TYPE type reg
+	R64_NUM reg_type_r64 \reg
+	XMM_NUM reg_type_xmm \reg
+	.if reg_type_r64 != REG_NUM_INVALID
+	\type = REG_TYPE_R64
+	.elseif reg_type_xmm != REG_NUM_INVALID
+	\type = REG_TYPE_XMM
+	.else
+	\type = REG_TYPE_INVALID
+	.endif
+	.endm
+
 	.macro PFX_OPD_SIZE
 	.byte 0x66
 	.endm
 
-	.macro PFX_REX opd1 opd2
-	.if (\opd1 | \opd2) & 8
-	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+	.macro PFX_REX opd1 opd2 W=0
+	.if ((\opd1 | \opd2) & 8) || \W
+	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
 	.endif
 	.endm
 
@@ -145,6 +216,25 @@
 	.byte 0x0f, 0x38, 0xdf
 	MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
 	.endm
+
+	.macro MOVQ_R64_XMM opd1 opd2
+	REG_TYPE movq_r64_xmm_opd1_type \opd1
+	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+	XMM_NUM movq_r64_xmm_opd1 \opd1
+	R64_NUM movq_r64_xmm_opd2 \opd2
+	.else
+	R64_NUM movq_r64_xmm_opd1 \opd1
+	XMM_NUM movq_r64_xmm_opd2 \opd2
+	.endif
+	PFX_OPD_SIZE
+	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
+	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+	.byte 0x0f, 0x7e
+	.else
+	.byte 0x0f, 0x6e
+	.endif
+	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
+	.endm
 #endif
 
 #endif
-- 
cgit v0.10.2


From 304a204ec9d4b9b31f3491e736bfbba44feaa1b0 Mon Sep 17 00:00:00 2001
From: Shane Wang <shane.wang@intel.com>
Date: Thu, 18 Mar 2010 20:22:55 +0800
Subject: crypto: vmac - Fix big-endian support

This patch is to fix the vmac algorithm, add more test cases for vmac,
and fix the test failure on some big endian system like s390.

Signed-off-by: Shane Wang <shane.wang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index fb76517..74e3537 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1669,17 +1669,73 @@ static struct hash_testvec aes_xcbc128_tv_template[] = {
 	}
 };
 
-#define VMAC_AES_TEST_VECTORS	1
-static char vmac_string[128] = {'\x01', '\x01', '\x01', '\x01',
+#define VMAC_AES_TEST_VECTORS	8
+static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01',
 				'\x02', '\x03', '\x02', '\x02',
 				'\x02', '\x04', '\x01', '\x07',
 				'\x04', '\x01', '\x04', '\x03',};
+static char vmac_string2[128] = {'a', 'b', 'c',};
+static char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				};
+
 static struct hash_testvec aes_vmac128_tv_template[] = {
 	{
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = NULL,
+		.digest	= "\x07\x58\x80\x35\x77\xa4\x7b\x54",
+		.psize	= 0,
+		.ksize	= 16,
+	}, {
+		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = vmac_string1,
+		.digest = "\xce\xf5\x3c\xd3\xae\x68\x8c\xa1",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = vmac_string2,
+		.digest = "\xc9\x27\xb0\x73\x81\xbd\x14\x2d",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
 		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.plaintext = vmac_string,
-		.digest = "\xcb\xd7\x8a\xfd\xb7\x33\x79\xe7",
+		.plaintext = vmac_string3,
+		.digest = "\x8d\x1a\x95\x8c\x98\x47\x0b\x19",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key	= "abcdefghijklmnop",
+		.plaintext = NULL,
+		.digest	= "\x3b\x89\xa1\x26\x9e\x55\x8f\x84",
+		.psize	= 0,
+		.ksize	= 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string1,
+		.digest = "\xab\x5e\xab\xb0\xf6\x8d\x74\xc2",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string2,
+		.digest = "\x11\x15\x68\x42\x3d\x7b\x09\xdf",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string3,
+		.digest = "\x8b\x32\x8f\xe1\xed\x8f\xfa\xd4",
 		.psize  = 128,
 		.ksize  = 16,
 	},
diff --git a/crypto/vmac.c b/crypto/vmac.c
index 0a9468e..0999274 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -43,6 +43,8 @@ const u64 m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
 const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
 const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 
+#define pe64_to_cpup le64_to_cpup		/* Prefer little endian */
+
 #ifdef __LITTLE_ENDIAN
 #define INDEX_HIGH 1
 #define INDEX_LOW 0
@@ -110,8 +112,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 		int i; u64 th, tl;					\
 		rh = rl = 0;						\
 		for (i = 0; i < nw; i += 2) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
 		}							\
 	} while (0)
@@ -121,11 +123,11 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 		int i; u64 th, tl;					\
 		rh1 = rl1 = rh = rl = 0;				\
 		for (i = 0; i < nw; i += 2) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
 			ADD128(rh1, rl1, th, tl);			\
 		}							\
 	} while (0)
@@ -136,17 +138,17 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 		int i; u64 th, tl;					\
 		rh = rl = 0;						\
 		for (i = 0; i < nw; i += 8) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
 			ADD128(rh, rl, th, tl);				\
 		}							\
 	} while (0)
@@ -156,29 +158,29 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 		int i; u64 th, tl;					\
 		rh1 = rl1 = rh = rl = 0;				\
 		for (i = 0; i < nw; i += 8) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+5]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+7]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+8],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+9]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+9]);	\
 			ADD128(rh1, rl1, th, tl);			\
 		}							\
 	} while (0)
@@ -216,8 +218,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 		int i;							\
 		rh = rl = t = 0;					\
 		for (i = 0; i < nw; i += 2)  {				\
-			t1 = le64_to_cpup(mp+i) + kp[i];		\
-			t2 = le64_to_cpup(mp+i+1) + kp[i+1];		\
+			t1 = pe64_to_cpup(mp+i) + kp[i];		\
+			t2 = pe64_to_cpup(mp+i+1) + kp[i+1];		\
 			m2 = MUL32(t1 >> 32, t2);			\
 			m1 = MUL32(t1, t2 >> 32);			\
 			ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32),	\
@@ -322,8 +324,7 @@ static void vhash_abort(struct vmac_ctx *ctx)
 	ctx->first_block_processed = 0;
 }
 
-static u64 l3hash(u64 p1, u64 p2,
-			u64 k1, u64 k2, u64 len)
+static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
 	u64 rh, rl, t, z = 0;
 
@@ -474,7 +475,7 @@ static u64 vmac(unsigned char m[], unsigned int mbytes,
 	}
 	p = be64_to_cpup(out_p + i);
 	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-	return p + h;
+	return le64_to_cpu(p + h);
 }
 
 static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
@@ -549,10 +550,6 @@ static int vmac_setkey(struct crypto_shash *parent,
 
 static int vmac_init(struct shash_desc *pdesc)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
 	return 0;
 }
 
-- 
cgit v0.10.2


From 80a6d7db9fa06c29c89ccce0374870ac64a81d3f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 21:35:23 +0800
Subject: crypto: pcrypt - Handle crypto_get_attr_type errors

I was concerned about the error handling for crypto_get_attr_type() in
pcrypt_alloc_aead().  Steffen Klassert pointed out that we could simply
avoid calling crypto_get_attr_type() if we passed the type and mask as a
parameters.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 8020124..247178c 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -315,16 +315,13 @@ out_free_inst:
 	goto out;
 }
 
-static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb)
+static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
+						 u32 type, u32 mask)
 {
 	struct crypto_instance *inst;
 	struct crypto_alg *alg;
-	struct crypto_attr_type *algt;
-
-	algt = crypto_get_attr_type(tb);
 
-	alg = crypto_get_attr_alg(tb, algt->type,
-				  (algt->mask & CRYPTO_ALG_TYPE_MASK));
+	alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK));
 	if (IS_ERR(alg))
 		return ERR_CAST(alg);
 
@@ -365,7 +362,7 @@ static struct crypto_instance *pcrypt_alloc(struct rtattr **tb)
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AEAD:
-		return pcrypt_alloc_aead(tb);
+		return pcrypt_alloc_aead(tb, algt->type, algt->mask);
 	}
 
 	return ERR_PTR(-EINVAL);
-- 
cgit v0.10.2


From 62e7bec49479e0c61e8cfd914f722a9ca6fd52e5 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 24 Mar 2010 21:37:57 +0800
Subject: crypto: aesni-intel - Fix another CTR build failure with gas 2.16.1

The previous AES-NI CTR optimization compiling failure gas 2.16.1 fix
introduces another compiling failure by itself. This patch fixes that.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 840a399..280bf7f 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -120,9 +120,9 @@
 	.macro REG_TYPE type reg
 	R64_NUM reg_type_r64 \reg
 	XMM_NUM reg_type_xmm \reg
-	.if reg_type_r64 != REG_NUM_INVALID
+	.if reg_type_r64 <> REG_NUM_INVALID
 	\type = REG_TYPE_R64
-	.elseif reg_type_xmm != REG_NUM_INVALID
+	.elseif reg_type_xmm <> REG_NUM_INVALID
 	\type = REG_TYPE_XMM
 	.else
 	\type = REG_TYPE_INVALID
-- 
cgit v0.10.2


From 975d260355fca4734b545a0e3366672af0356905 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Mon, 29 Mar 2010 16:15:31 +0800
Subject: padata: Section cleanup

This patch removes the __cupinit from padata_cpu_callback(),
which is refered by the exportet function padata_alloc().

This could lead to problems if CONFIG_HOTPLUG_CPU is disabled,
which should happen very often.

WARNING: kernel/built-in.o(.text+0x7ffcb): Section mismatch in reference from the function padata_alloc() to the function .cpuinit.text:padata_cpu_callback()
The function padata_alloc() references
the function __cpuinit padata_cpu_callback().
This is often because padata_alloc lacks a __cpuinit
annotation or the annotation of padata_cpu_callback is wrong.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 6f9bcb8..0282478 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -569,8 +569,8 @@ void padata_stop(struct padata_instance *pinst)
 }
 EXPORT_SYMBOL(padata_stop);
 
-static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
-					 unsigned long action, void *hcpu)
+static int padata_cpu_callback(struct notifier_block *nfb,
+			       unsigned long action, void *hcpu)
 {
 	int err;
 	struct padata_instance *pinst;
-- 
cgit v0.10.2


From 0328ac267564089d9cedfb568f936d30a6debd21 Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:25:37 +0300
Subject: crypto: mv_cesa - Invoke the user callback from a softirq context

Invoke the user callback from a softirq context

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index b21ef63..3e60ba9 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -275,7 +275,9 @@ static void dequeue_complete_req(void)
 		sg_miter_stop(&cpg->p.dst_sg_it);
 		mv_crypto_algo_completion();
 		cpg->eng_st = ENGINE_IDLE;
+		local_bh_disable();
 		req->base.complete(&req->base, 0);
+		local_bh_enable();
 	}
 }
 
-- 
cgit v0.10.2


From 6bc6fcd609080461682c5cc0a1e3bf4345d6419d Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:25:56 +0300
Subject: crypto: mv_cesa - Remove compiler warning in mv_cesa driver

Remove compiler warning

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 3e60ba9..37d9f06 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -178,6 +178,7 @@ static void mv_process_current_q(int first_block)
 		op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB;
 		break;
 	case COP_AES_CBC:
+	default:
 		op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC;
 		op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
 			ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF);
-- 
cgit v0.10.2


From f565e67ec1b8f4a95d21550f9b879fe86b4132e0 Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:26:34 +0300
Subject: crypto: mv_cesa - Fix situation where the dest sglist is organized
 differently than the source sglist

Bugfix for situations where the destination scatterlist has a different
buffer structure than the source scatterlist (e.g. source has one 2K
buffer and dest has 2 1K buffers)

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 37d9f06..018a95c 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -242,6 +242,8 @@ static void dequeue_complete_req(void)
 	struct ablkcipher_request *req = cpg->cur_req;
 	void *buf;
 	int ret;
+	int need_copy_len = cpg->p.crypt_len;
+	int sram_offset = 0;
 
 	cpg->p.total_req_bytes += cpg->p.crypt_len;
 	do {
@@ -257,14 +259,16 @@ static void dequeue_complete_req(void)
 		buf = cpg->p.dst_sg_it.addr;
 		buf += cpg->p.dst_start;
 
-		dst_copy = min(cpg->p.crypt_len, cpg->p.sg_dst_left);
-
-		memcpy(buf, cpg->sram + SRAM_DATA_OUT_START, dst_copy);
+		dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
 
+		memcpy(buf,
+		       cpg->sram + SRAM_DATA_OUT_START + sram_offset,
+		       dst_copy);
+		sram_offset += dst_copy;
 		cpg->p.sg_dst_left -= dst_copy;
-		cpg->p.crypt_len -= dst_copy;
+		need_copy_len -= dst_copy;
 		cpg->p.dst_start += dst_copy;
-	} while (cpg->p.crypt_len > 0);
+	} while (need_copy_len > 0);
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
 	if (cpg->p.total_req_bytes < req->nbytes) {
-- 
cgit v0.10.2


From 15d4dd3594221f11a7730fcf2d5f9942b96cdd7e Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:27:02 +0300
Subject: crypto: mv_cesa - Fix situations where the src sglist spans more data
 than the request asks for

Fix for situations where the source scatterlist spans more data than the
request nbytes

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 018a95c..096f9ff 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -143,27 +143,45 @@ static int mv_setkey_aes(struct crypto_ablkcipher *cipher, const u8 *key,
 	return 0;
 }
 
-static void setup_data_in(struct ablkcipher_request *req)
+static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 {
 	int ret;
-	void *buf;
-
-	if (!cpg->p.sg_src_left) {
-		ret = sg_miter_next(&cpg->p.src_sg_it);
-		BUG_ON(!ret);
-		cpg->p.sg_src_left = cpg->p.src_sg_it.length;
-		cpg->p.src_start = 0;
-	}
+	void *sbuf;
+	int copied = 0;
 
-	cpg->p.crypt_len = min(cpg->p.sg_src_left, cpg->max_req_size);
-
-	buf = cpg->p.src_sg_it.addr;
-	buf += cpg->p.src_start;
+	while (1) {
+		if (!p->sg_src_left) {
+			ret = sg_miter_next(&p->src_sg_it);
+			BUG_ON(!ret);
+			p->sg_src_left = p->src_sg_it.length;
+			p->src_start = 0;
+		}
 
-	memcpy(cpg->sram + SRAM_DATA_IN_START, buf, cpg->p.crypt_len);
+		sbuf = p->src_sg_it.addr + p->src_start;
+
+		if (p->sg_src_left <= len - copied) {
+			memcpy(dbuf + copied, sbuf, p->sg_src_left);
+			copied += p->sg_src_left;
+			p->sg_src_left = 0;
+			if (copied >= len)
+				break;
+		} else {
+			int copy_len = len - copied;
+			memcpy(dbuf + copied, sbuf, copy_len);
+			p->src_start += copy_len;
+			p->sg_src_left -= copy_len;
+			break;
+		}
+	}
+}
 
-	cpg->p.sg_src_left -= cpg->p.crypt_len;
-	cpg->p.src_start += cpg->p.crypt_len;
+static void setup_data_in(struct ablkcipher_request *req)
+{
+	struct req_progress *p = &cpg->p;
+	p->crypt_len =
+	    min((int)req->nbytes - p->total_req_bytes, cpg->max_req_size);
+	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START,
+			p->crypt_len);
 }
 
 static void mv_process_current_q(int first_block)
@@ -289,12 +307,16 @@ static void dequeue_complete_req(void)
 static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
 {
 	int i = 0;
-
-	do {
-		total_bytes -= sl[i].length;
-		i++;
-
-	} while (total_bytes > 0);
+	size_t cur_len;
+
+	while (1) {
+		cur_len = sl[i].length;
+		++i;
+		if (total_bytes > cur_len)
+			total_bytes -= cur_len;
+		else
+			break;
+	}
 
 	return i;
 }
-- 
cgit v0.10.2


From 3b61a90502481045f56c1c41a2af35ee48ca8b80 Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:27:33 +0300
Subject: crypto: mv_cesa - Enqueue generic async requests

Enqueue generic async requests rather than ablkcipher requests
in the driver's queue

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 096f9ff..8891e2e 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -39,6 +39,7 @@ enum engine_status {
  * @sg_src_left:	bytes left in src to process (scatter list)
  * @src_start:		offset to add to src start position (scatter list)
  * @crypt_len:		length of current crypt process
+ * @hw_nbytes:		total bytes to process in hw for this request
  * @sg_dst_left:	bytes left dst to process in this scatter list
  * @dst_start:		offset to add to dst start position (scatter list)
  * @total_req_bytes:	total number of bytes processed (request).
@@ -55,6 +56,7 @@ struct req_progress {
 	int sg_src_left;
 	int src_start;
 	int crypt_len;
+	int hw_nbytes;
 	/* dst mostly */
 	int sg_dst_left;
 	int dst_start;
@@ -71,7 +73,7 @@ struct crypto_priv {
 	spinlock_t lock;
 	struct crypto_queue queue;
 	enum engine_status eng_st;
-	struct ablkcipher_request *cur_req;
+	struct crypto_async_request *cur_req;
 	struct req_progress p;
 	int max_req_size;
 	int sram_size;
@@ -175,18 +177,18 @@ static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 	}
 }
 
-static void setup_data_in(struct ablkcipher_request *req)
+static void setup_data_in(void)
 {
 	struct req_progress *p = &cpg->p;
 	p->crypt_len =
-	    min((int)req->nbytes - p->total_req_bytes, cpg->max_req_size);
+	    min(p->hw_nbytes - p->total_req_bytes, cpg->max_req_size);
 	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START,
 			p->crypt_len);
 }
 
 static void mv_process_current_q(int first_block)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
 	struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 	struct sec_accel_config op;
@@ -229,7 +231,7 @@ static void mv_process_current_q(int first_block)
 		ENC_P_DST(SRAM_DATA_OUT_START);
 	op.enc_key_p = SRAM_DATA_KEY_P;
 
-	setup_data_in(req);
+	setup_data_in();
 	op.enc_len = cpg->p.crypt_len;
 	memcpy(cpg->sram + SRAM_CONFIG, &op,
 			sizeof(struct sec_accel_config));
@@ -246,7 +248,7 @@ static void mv_process_current_q(int first_block)
 
 static void mv_crypto_algo_completion(void)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
 	struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 
 	if (req_ctx->op != COP_AES_CBC)
@@ -257,7 +259,7 @@ static void mv_crypto_algo_completion(void)
 
 static void dequeue_complete_req(void)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct crypto_async_request *req = cpg->cur_req;
 	void *buf;
 	int ret;
 	int need_copy_len = cpg->p.crypt_len;
@@ -289,7 +291,7 @@ static void dequeue_complete_req(void)
 	} while (need_copy_len > 0);
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
-	if (cpg->p.total_req_bytes < req->nbytes) {
+	if (cpg->p.total_req_bytes < cpg->p.hw_nbytes) {
 		/* process next scatter list entry */
 		cpg->eng_st = ENGINE_BUSY;
 		mv_process_current_q(0);
@@ -299,7 +301,7 @@ static void dequeue_complete_req(void)
 		mv_crypto_algo_completion();
 		cpg->eng_st = ENGINE_IDLE;
 		local_bh_disable();
-		req->base.complete(&req->base, 0);
+		req->complete(req, 0);
 		local_bh_enable();
 	}
 }
@@ -323,16 +325,19 @@ static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
 
 static void mv_enqueue_new_req(struct ablkcipher_request *req)
 {
+	struct req_progress *p = &cpg->p;
 	int num_sgs;
 
-	cpg->cur_req = req;
-	memset(&cpg->p, 0, sizeof(struct req_progress));
+	cpg->cur_req = &req->base;
+	memset(p, 0, sizeof(struct req_progress));
+	p->hw_nbytes = req->nbytes;
 
 	num_sgs = count_sgs(req->src, req->nbytes);
-	sg_miter_start(&cpg->p.src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
 
 	num_sgs = count_sgs(req->dst, req->nbytes);
-	sg_miter_start(&cpg->p.dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+	sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+
 	mv_process_current_q(1);
 }
 
@@ -378,13 +383,13 @@ static int queue_manag(void *data)
 	return 0;
 }
 
-static int mv_handle_req(struct ablkcipher_request *req)
+static int mv_handle_req(struct crypto_async_request *req)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cpg->lock, flags);
-	ret = ablkcipher_enqueue_request(&cpg->queue, req);
+	ret = crypto_enqueue_request(&cpg->queue, req);
 	spin_unlock_irqrestore(&cpg->lock, flags);
 	wake_up_process(cpg->queue_th);
 	return ret;
@@ -397,7 +402,7 @@ static int mv_enc_aes_ecb(struct ablkcipher_request *req)
 	req_ctx->op = COP_AES_ECB;
 	req_ctx->decrypt = 0;
 
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_ecb(struct ablkcipher_request *req)
@@ -409,7 +414,7 @@ static int mv_dec_aes_ecb(struct ablkcipher_request *req)
 	req_ctx->decrypt = 1;
 
 	compute_aes_dec_key(ctx);
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_enc_aes_cbc(struct ablkcipher_request *req)
@@ -419,7 +424,7 @@ static int mv_enc_aes_cbc(struct ablkcipher_request *req)
 	req_ctx->op = COP_AES_CBC;
 	req_ctx->decrypt = 0;
 
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_cbc(struct ablkcipher_request *req)
@@ -431,7 +436,7 @@ static int mv_dec_aes_cbc(struct ablkcipher_request *req)
 	req_ctx->decrypt = 1;
 
 	compute_aes_dec_key(ctx);
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_cra_init(struct crypto_tfm *tfm)
-- 
cgit v0.10.2


From 7a5f691ef03f4c01d2703b5ec4ddd4c17e645dec Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:29:16 +0300
Subject: crypto: mv_cesa - Rename a variable to a more suitable name

Rename a variable to a more suitable name

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 8891e2e..4262932 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -42,7 +42,7 @@ enum engine_status {
  * @hw_nbytes:		total bytes to process in hw for this request
  * @sg_dst_left:	bytes left dst to process in this scatter list
  * @dst_start:		offset to add to dst start position (scatter list)
- * @total_req_bytes:	total number of bytes processed (request).
+ * @hw_processed_bytes:	number of bytes processed by hw (request).
  *
  * sg helper are used to iterate over the scatterlist. Since the size of the
  * SRAM may be less than the scatter size, this struct struct is used to keep
@@ -60,7 +60,7 @@ struct req_progress {
 	/* dst mostly */
 	int sg_dst_left;
 	int dst_start;
-	int total_req_bytes;
+	int hw_processed_bytes;
 };
 
 struct crypto_priv {
@@ -181,7 +181,7 @@ static void setup_data_in(void)
 {
 	struct req_progress *p = &cpg->p;
 	p->crypt_len =
-	    min(p->hw_nbytes - p->total_req_bytes, cpg->max_req_size);
+	    min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
 	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START,
 			p->crypt_len);
 }
@@ -265,7 +265,7 @@ static void dequeue_complete_req(void)
 	int need_copy_len = cpg->p.crypt_len;
 	int sram_offset = 0;
 
-	cpg->p.total_req_bytes += cpg->p.crypt_len;
+	cpg->p.hw_processed_bytes += cpg->p.crypt_len;
 	do {
 		int dst_copy;
 
@@ -291,7 +291,7 @@ static void dequeue_complete_req(void)
 	} while (need_copy_len > 0);
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
-	if (cpg->p.total_req_bytes < cpg->p.hw_nbytes) {
+	if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
 		/* process next scatter list entry */
 		cpg->eng_st = ENGINE_BUSY;
 		mv_process_current_q(0);
-- 
cgit v0.10.2


From a58094ac5f95d6969e5c52ff096d2fd2864542af Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:30:19 +0300
Subject: crypto: mv_cesa - Execute some code via function pointers rathr than
 direct calls

Execute some code via function pointers rathr than direct calls
(to allow customization in the hashing request)

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 4262932..2b4f07a 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -51,6 +51,8 @@ enum engine_status {
 struct req_progress {
 	struct sg_mapping_iter src_sg_it;
 	struct sg_mapping_iter dst_sg_it;
+	void (*complete) (void);
+	void (*process) (int is_first);
 
 	/* src mostly */
 	int sg_src_left;
@@ -251,6 +253,9 @@ static void mv_crypto_algo_completion(void)
 	struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
 	struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 
+	sg_miter_stop(&cpg->p.src_sg_it);
+	sg_miter_stop(&cpg->p.dst_sg_it);
+
 	if (req_ctx->op != COP_AES_CBC)
 		return ;
 
@@ -294,11 +299,9 @@ static void dequeue_complete_req(void)
 	if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
 		/* process next scatter list entry */
 		cpg->eng_st = ENGINE_BUSY;
-		mv_process_current_q(0);
+		cpg->p.process(0);
 	} else {
-		sg_miter_stop(&cpg->p.src_sg_it);
-		sg_miter_stop(&cpg->p.dst_sg_it);
-		mv_crypto_algo_completion();
+		cpg->p.complete();
 		cpg->eng_st = ENGINE_IDLE;
 		local_bh_disable();
 		req->complete(req, 0);
@@ -331,6 +334,8 @@ static void mv_enqueue_new_req(struct ablkcipher_request *req)
 	cpg->cur_req = &req->base;
 	memset(p, 0, sizeof(struct req_progress));
 	p->hw_nbytes = req->nbytes;
+	p->complete = mv_crypto_algo_completion;
+	p->process = mv_process_current_q;
 
 	num_sgs = count_sgs(req->src, req->nbytes);
 	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
-- 
cgit v0.10.2


From f0d03deaad05d9cc99cd2ee0475c9ecd726c19ae Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:31:48 +0300
Subject: crypto: mv_cesa - Make the copy-back of data optional

Make the copy-back of data optional (not done in hashing requests)

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 2b4f07a..49a2206 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -40,6 +40,7 @@ enum engine_status {
  * @src_start:		offset to add to src start position (scatter list)
  * @crypt_len:		length of current crypt process
  * @hw_nbytes:		total bytes to process in hw for this request
+ * @copy_back:		whether to copy data back (crypt) or not (hash)
  * @sg_dst_left:	bytes left dst to process in this scatter list
  * @dst_start:		offset to add to dst start position (scatter list)
  * @hw_processed_bytes:	number of bytes processed by hw (request).
@@ -60,6 +61,7 @@ struct req_progress {
 	int crypt_len;
 	int hw_nbytes;
 	/* dst mostly */
+	int copy_back;
 	int sg_dst_left;
 	int dst_start;
 	int hw_processed_bytes;
@@ -267,33 +269,35 @@ static void dequeue_complete_req(void)
 	struct crypto_async_request *req = cpg->cur_req;
 	void *buf;
 	int ret;
-	int need_copy_len = cpg->p.crypt_len;
-	int sram_offset = 0;
-
 	cpg->p.hw_processed_bytes += cpg->p.crypt_len;
-	do {
-		int dst_copy;
+	if (cpg->p.copy_back) {
+		int need_copy_len = cpg->p.crypt_len;
+		int sram_offset = 0;
+		do {
+			int dst_copy;
+
+			if (!cpg->p.sg_dst_left) {
+				ret = sg_miter_next(&cpg->p.dst_sg_it);
+				BUG_ON(!ret);
+				cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
+				cpg->p.dst_start = 0;
+			}
 
-		if (!cpg->p.sg_dst_left) {
-			ret = sg_miter_next(&cpg->p.dst_sg_it);
-			BUG_ON(!ret);
-			cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
-			cpg->p.dst_start = 0;
-		}
+			buf = cpg->p.dst_sg_it.addr;
+			buf += cpg->p.dst_start;
 
-		buf = cpg->p.dst_sg_it.addr;
-		buf += cpg->p.dst_start;
+			dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
 
-		dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
+			memcpy(buf,
+			       cpg->sram + SRAM_DATA_OUT_START + sram_offset,
+			       dst_copy);
+			sram_offset += dst_copy;
+			cpg->p.sg_dst_left -= dst_copy;
+			need_copy_len -= dst_copy;
+			cpg->p.dst_start += dst_copy;
+		} while (need_copy_len > 0);
+	}
 
-		memcpy(buf,
-		       cpg->sram + SRAM_DATA_OUT_START + sram_offset,
-		       dst_copy);
-		sram_offset += dst_copy;
-		cpg->p.sg_dst_left -= dst_copy;
-		need_copy_len -= dst_copy;
-		cpg->p.dst_start += dst_copy;
-	} while (need_copy_len > 0);
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
 	if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
@@ -336,6 +340,7 @@ static void mv_enqueue_new_req(struct ablkcipher_request *req)
 	p->hw_nbytes = req->nbytes;
 	p->complete = mv_crypto_algo_completion;
 	p->process = mv_process_current_q;
+	p->copy_back = 1;
 
 	num_sgs = count_sgs(req->src, req->nbytes);
 	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
-- 
cgit v0.10.2


From 0c5c6c4bae8fe9ae3d86b44c332eb1267df1ec99 Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:33:26 +0300
Subject: crypto: mv_cesa - Support processing of data from previous requests

Support processing of data from previous requests (as in hashing
update/final requests).

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 49a2206..d0fb10e 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -184,10 +184,11 @@ static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 static void setup_data_in(void)
 {
 	struct req_progress *p = &cpg->p;
-	p->crypt_len =
+	int data_in_sram =
 	    min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
-	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START,
-			p->crypt_len);
+	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len,
+			data_in_sram - p->crypt_len);
+	p->crypt_len = data_in_sram;
 }
 
 static void mv_process_current_q(int first_block)
@@ -298,6 +299,7 @@ static void dequeue_complete_req(void)
 		} while (need_copy_len > 0);
 	}
 
+	cpg->p.crypt_len = 0;
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
 	if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
-- 
cgit v0.10.2


From 750052dd2400cd09e0864d75b63c2c0bf605056f Mon Sep 17 00:00:00 2001
From: Uri Simchoni <uri@jdland.co.il>
Date: Thu, 8 Apr 2010 19:34:55 +0300
Subject: crypto: mv_cesa - Add sha1 and hmac(sha1) async hash drivers

Add sha1 and hmac(sha1) async hash drivers

Signed-off-by: Uri Simchoni <uri@jdland.co.il>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index d0fb10e..1cee5a9 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -14,8 +14,14 @@
 #include <linux/kthread.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
 
 #include "mv_cesa.h"
+
+#define MV_CESA	"MV-CESA:"
+#define MAX_HW_HASH_SIZE	0xFFFF
+
 /*
  * STM:
  *   /---------------------------------------\
@@ -38,7 +44,7 @@ enum engine_status {
  * @dst_sg_it:		sg iterator for dst
  * @sg_src_left:	bytes left in src to process (scatter list)
  * @src_start:		offset to add to src start position (scatter list)
- * @crypt_len:		length of current crypt process
+ * @crypt_len:		length of current hw crypt/hash process
  * @hw_nbytes:		total bytes to process in hw for this request
  * @copy_back:		whether to copy data back (crypt) or not (hash)
  * @sg_dst_left:	bytes left dst to process in this scatter list
@@ -81,6 +87,8 @@ struct crypto_priv {
 	struct req_progress p;
 	int max_req_size;
 	int sram_size;
+	int has_sha1;
+	int has_hmac_sha1;
 };
 
 static struct crypto_priv *cpg;
@@ -102,6 +110,31 @@ struct mv_req_ctx {
 	int decrypt;
 };
 
+enum hash_op {
+	COP_SHA1,
+	COP_HMAC_SHA1
+};
+
+struct mv_tfm_hash_ctx {
+	struct crypto_shash *fallback;
+	struct crypto_shash *base_hash;
+	u32 ivs[2 * SHA1_DIGEST_SIZE / 4];
+	int count_add;
+	enum hash_op op;
+};
+
+struct mv_req_hash_ctx {
+	u64 count;
+	u32 state[SHA1_DIGEST_SIZE / 4];
+	u8 buffer[SHA1_BLOCK_SIZE];
+	int first_hash;		/* marks that we don't have previous state */
+	int last_chunk;		/* marks that this is the 'final' request */
+	int extra_bytes;	/* unprocessed bytes in buffer */
+	enum hash_op op;
+	int count_add;
+	struct scatterlist dummysg;
+};
+
 static void compute_aes_dec_key(struct mv_ctx *ctx)
 {
 	struct crypto_aes_ctx gen_aes_key;
@@ -265,6 +298,132 @@ static void mv_crypto_algo_completion(void)
 	memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16);
 }
 
+static void mv_process_hash_current(int first_block)
+{
+	struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+	struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+	struct req_progress *p = &cpg->p;
+	struct sec_accel_config op = { 0 };
+	int is_last;
+
+	switch (req_ctx->op) {
+	case COP_SHA1:
+	default:
+		op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+		break;
+	case COP_HMAC_SHA1:
+		op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+		break;
+	}
+
+	op.mac_src_p =
+		MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32)
+		req_ctx->
+		count);
+
+	setup_data_in();
+
+	op.mac_digest =
+		MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
+	op.mac_iv =
+		MAC_INNER_IV_P(SRAM_HMAC_IV_IN) |
+		MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT);
+
+	is_last = req_ctx->last_chunk
+		&& (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes)
+		&& (req_ctx->count <= MAX_HW_HASH_SIZE);
+	if (req_ctx->first_hash) {
+		if (is_last)
+			op.config |= CFG_NOT_FRAG;
+		else
+			op.config |= CFG_FIRST_FRAG;
+
+		req_ctx->first_hash = 0;
+	} else {
+		if (is_last)
+			op.config |= CFG_LAST_FRAG;
+		else
+			op.config |= CFG_MID_FRAG;
+	}
+
+	memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
+
+	writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
+	/* GO */
+	writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+
+	/*
+	* XXX: add timer if the interrupt does not occur for some mystery
+	* reason
+	*/
+}
+
+static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
+					  struct shash_desc *desc)
+{
+	int i;
+	struct sha1_state shash_state;
+
+	shash_state.count = ctx->count + ctx->count_add;
+	for (i = 0; i < 5; i++)
+		shash_state.state[i] = ctx->state[i];
+	memcpy(shash_state.buffer, ctx->buffer, sizeof(shash_state.buffer));
+	return crypto_shash_import(desc, &shash_state);
+}
+
+static int mv_hash_final_fallback(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(tfm_ctx->fallback)];
+	} desc;
+	int rc;
+
+	desc.shash.tfm = tfm_ctx->fallback;
+	desc.shash.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	if (unlikely(req_ctx->first_hash)) {
+		crypto_shash_init(&desc.shash);
+		crypto_shash_update(&desc.shash, req_ctx->buffer,
+				    req_ctx->extra_bytes);
+	} else {
+		/* only SHA1 for now....
+		 */
+		rc = mv_hash_import_sha1_ctx(req_ctx, &desc.shash);
+		if (rc)
+			goto out;
+	}
+	rc = crypto_shash_final(&desc.shash, req->result);
+out:
+	return rc;
+}
+
+static void mv_hash_algo_completion(void)
+{
+	struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+
+	if (ctx->extra_bytes)
+		copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes);
+	sg_miter_stop(&cpg->p.src_sg_it);
+
+	ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+	ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+	ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+	ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+	ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+
+	if (likely(ctx->last_chunk)) {
+		if (likely(ctx->count <= MAX_HW_HASH_SIZE)) {
+			memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
+			       crypto_ahash_digestsize(crypto_ahash_reqtfm
+						       (req)));
+		} else
+			mv_hash_final_fallback(req);
+	}
+}
+
 static void dequeue_complete_req(void)
 {
 	struct crypto_async_request *req = cpg->cur_req;
@@ -332,7 +491,7 @@ static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
 	return i;
 }
 
-static void mv_enqueue_new_req(struct ablkcipher_request *req)
+static void mv_start_new_crypt_req(struct ablkcipher_request *req)
 {
 	struct req_progress *p = &cpg->p;
 	int num_sgs;
@@ -353,11 +512,68 @@ static void mv_enqueue_new_req(struct ablkcipher_request *req)
 	mv_process_current_q(1);
 }
 
+static void mv_start_new_hash_req(struct ahash_request *req)
+{
+	struct req_progress *p = &cpg->p;
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	int num_sgs, hw_bytes, old_extra_bytes, rc;
+	cpg->cur_req = &req->base;
+	memset(p, 0, sizeof(struct req_progress));
+	hw_bytes = req->nbytes + ctx->extra_bytes;
+	old_extra_bytes = ctx->extra_bytes;
+
+	if (unlikely(ctx->extra_bytes)) {
+		memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer,
+		       ctx->extra_bytes);
+		p->crypt_len = ctx->extra_bytes;
+	}
+
+	memcpy(cpg->sram + SRAM_HMAC_IV_IN, tfm_ctx->ivs, sizeof(tfm_ctx->ivs));
+
+	if (unlikely(!ctx->first_hash)) {
+		writel(ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A);
+		writel(ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B);
+		writel(ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C);
+		writel(ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D);
+		writel(ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E);
+	}
+
+	ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE;
+	if (ctx->extra_bytes != 0
+	    && (!ctx->last_chunk || ctx->count > MAX_HW_HASH_SIZE))
+		hw_bytes -= ctx->extra_bytes;
+	else
+		ctx->extra_bytes = 0;
+
+	num_sgs = count_sgs(req->src, req->nbytes);
+	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+
+	if (hw_bytes) {
+		p->hw_nbytes = hw_bytes;
+		p->complete = mv_hash_algo_completion;
+		p->process = mv_process_hash_current;
+
+		mv_process_hash_current(1);
+	} else {
+		copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
+				ctx->extra_bytes - old_extra_bytes);
+		sg_miter_stop(&p->src_sg_it);
+		if (ctx->last_chunk)
+			rc = mv_hash_final_fallback(req);
+		else
+			rc = 0;
+		cpg->eng_st = ENGINE_IDLE;
+		local_bh_disable();
+		req->base.complete(&req->base, rc);
+		local_bh_enable();
+	}
+}
+
 static int queue_manag(void *data)
 {
 	cpg->eng_st = ENGINE_IDLE;
 	do {
-		struct ablkcipher_request *req;
 		struct crypto_async_request *async_req = NULL;
 		struct crypto_async_request *backlog;
 
@@ -383,9 +599,18 @@ static int queue_manag(void *data)
 		}
 
 		if (async_req) {
-			req = container_of(async_req,
-					struct ablkcipher_request, base);
-			mv_enqueue_new_req(req);
+			if (async_req->tfm->__crt_alg->cra_type !=
+			    &crypto_ahash_type) {
+				struct ablkcipher_request *req =
+				    container_of(async_req,
+						 struct ablkcipher_request,
+						 base);
+				mv_start_new_crypt_req(req);
+			} else {
+				struct ahash_request *req =
+				    ahash_request_cast(async_req);
+				mv_start_new_hash_req(req);
+			}
 			async_req = NULL;
 		}
 
@@ -457,6 +682,215 @@ static int mv_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void mv_init_hash_req_ctx(struct mv_req_hash_ctx *ctx, int op,
+				 int is_last, unsigned int req_len,
+				 int count_add)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->op = op;
+	ctx->count = req_len;
+	ctx->first_hash = 1;
+	ctx->last_chunk = is_last;
+	ctx->count_add = count_add;
+}
+
+static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last,
+				   unsigned req_len)
+{
+	ctx->last_chunk = is_last;
+	ctx->count += req_len;
+}
+
+static int mv_hash_init(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 0, 0,
+			     tfm_ctx->count_add);
+	return 0;
+}
+
+static int mv_hash_update(struct ahash_request *req)
+{
+	if (!req->nbytes)
+		return 0;
+
+	mv_update_hash_req_ctx(ahash_request_ctx(req), 0, req->nbytes);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_final(struct ahash_request *req)
+{
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+	/* dummy buffer of 4 bytes */
+	sg_init_one(&ctx->dummysg, ctx->buffer, 4);
+	/* I think I'm allowed to do that... */
+	ahash_request_set_crypt(req, &ctx->dummysg, req->result, 0);
+	mv_update_hash_req_ctx(ctx, 1, 0);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_finup(struct ahash_request *req)
+{
+	if (!req->nbytes)
+		return mv_hash_final(req);
+
+	mv_update_hash_req_ctx(ahash_request_ctx(req), 1, req->nbytes);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_digest(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 1,
+			     req->nbytes, tfm_ctx->count_add);
+	return mv_handle_req(&req->base);
+}
+
+static void mv_hash_init_ivs(struct mv_tfm_hash_ctx *ctx, const void *istate,
+			     const void *ostate)
+{
+	const struct sha1_state *isha1_state = istate, *osha1_state = ostate;
+	int i;
+	for (i = 0; i < 5; i++) {
+		ctx->ivs[i] = cpu_to_be32(isha1_state->state[i]);
+		ctx->ivs[i + 5] = cpu_to_be32(osha1_state->state[i]);
+	}
+}
+
+static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key,
+			  unsigned int keylen)
+{
+	int rc;
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+	int bs, ds, ss;
+
+	if (!ctx->base_hash)
+		return 0;
+
+	rc = crypto_shash_setkey(ctx->fallback, key, keylen);
+	if (rc)
+		return rc;
+
+	/* Can't see a way to extract the ipad/opad from the fallback tfm
+	   so I'm basically copying code from the hmac module */
+	bs = crypto_shash_blocksize(ctx->base_hash);
+	ds = crypto_shash_digestsize(ctx->base_hash);
+	ss = crypto_shash_statesize(ctx->base_hash);
+
+	{
+		struct {
+			struct shash_desc shash;
+			char ctx[crypto_shash_descsize(ctx->base_hash)];
+		} desc;
+		unsigned int i;
+		char ipad[ss];
+		char opad[ss];
+
+		desc.shash.tfm = ctx->base_hash;
+		desc.shash.flags = crypto_shash_get_flags(ctx->base_hash) &
+		    CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		if (keylen > bs) {
+			int err;
+
+			err =
+			    crypto_shash_digest(&desc.shash, key, keylen, ipad);
+			if (err)
+				return err;
+
+			keylen = ds;
+		} else
+			memcpy(ipad, key, keylen);
+
+		memset(ipad + keylen, 0, bs - keylen);
+		memcpy(opad, ipad, bs);
+
+		for (i = 0; i < bs; i++) {
+			ipad[i] ^= 0x36;
+			opad[i] ^= 0x5c;
+		}
+
+		rc = crypto_shash_init(&desc.shash) ? :
+		    crypto_shash_update(&desc.shash, ipad, bs) ? :
+		    crypto_shash_export(&desc.shash, ipad) ? :
+		    crypto_shash_init(&desc.shash) ? :
+		    crypto_shash_update(&desc.shash, opad, bs) ? :
+		    crypto_shash_export(&desc.shash, opad);
+
+		if (rc == 0)
+			mv_hash_init_ivs(ctx, ipad, opad);
+
+		return rc;
+	}
+}
+
+static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name,
+			    enum hash_op op, int count_add)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_shash *fallback_tfm = NULL;
+	struct crypto_shash *base_hash = NULL;
+	int err = -ENOMEM;
+
+	ctx->op = op;
+	ctx->count_add = count_add;
+
+	/* Allocate a fallback and abort if it failed. */
+	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		printk(KERN_WARNING MV_CESA
+		       "Fallback driver '%s' could not be loaded!\n",
+		       fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+	ctx->fallback = fallback_tfm;
+
+	if (base_hash_name) {
+		/* Allocate a hash to compute the ipad/opad of hmac. */
+		base_hash = crypto_alloc_shash(base_hash_name, 0,
+					       CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(base_hash)) {
+			printk(KERN_WARNING MV_CESA
+			       "Base driver '%s' could not be loaded!\n",
+			       base_hash_name);
+			err = PTR_ERR(fallback_tfm);
+			goto err_bad_base;
+		}
+	}
+	ctx->base_hash = base_hash;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mv_req_hash_ctx) +
+				 crypto_shash_descsize(ctx->fallback));
+	return 0;
+err_bad_base:
+	crypto_free_shash(fallback_tfm);
+out:
+	return err;
+}
+
+static void mv_cra_hash_exit(struct crypto_tfm *tfm)
+{
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(ctx->fallback);
+	if (ctx->base_hash)
+		crypto_free_shash(ctx->base_hash);
+}
+
+static int mv_cra_hash_sha1_init(struct crypto_tfm *tfm)
+{
+	return mv_cra_hash_init(tfm, NULL, COP_SHA1, 0);
+}
+
+static int mv_cra_hash_hmac_sha1_init(struct crypto_tfm *tfm)
+{
+	return mv_cra_hash_init(tfm, "sha1", COP_HMAC_SHA1, SHA1_BLOCK_SIZE);
+}
+
 irqreturn_t crypto_int(int irq, void *priv)
 {
 	u32 val;
@@ -519,6 +953,53 @@ struct crypto_alg mv_aes_alg_cbc = {
 	},
 };
 
+struct ahash_alg mv_sha1_alg = {
+	.init = mv_hash_init,
+	.update = mv_hash_update,
+	.final = mv_hash_final,
+	.finup = mv_hash_finup,
+	.digest = mv_hash_digest,
+	.halg = {
+		 .digestsize = SHA1_DIGEST_SIZE,
+		 .base = {
+			  .cra_name = "sha1",
+			  .cra_driver_name = "mv-sha1",
+			  .cra_priority = 300,
+			  .cra_flags =
+			  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			  .cra_blocksize = SHA1_BLOCK_SIZE,
+			  .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+			  .cra_init = mv_cra_hash_sha1_init,
+			  .cra_exit = mv_cra_hash_exit,
+			  .cra_module = THIS_MODULE,
+			  }
+		 }
+};
+
+struct ahash_alg mv_hmac_sha1_alg = {
+	.init = mv_hash_init,
+	.update = mv_hash_update,
+	.final = mv_hash_final,
+	.finup = mv_hash_finup,
+	.digest = mv_hash_digest,
+	.setkey = mv_hash_setkey,
+	.halg = {
+		 .digestsize = SHA1_DIGEST_SIZE,
+		 .base = {
+			  .cra_name = "hmac(sha1)",
+			  .cra_driver_name = "mv-hmac-sha1",
+			  .cra_priority = 300,
+			  .cra_flags =
+			  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			  .cra_blocksize = SHA1_BLOCK_SIZE,
+			  .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+			  .cra_init = mv_cra_hash_hmac_sha1_init,
+			  .cra_exit = mv_cra_hash_exit,
+			  .cra_module = THIS_MODULE,
+			  }
+		 }
+};
+
 static int mv_probe(struct platform_device *pdev)
 {
 	struct crypto_priv *cp;
@@ -527,7 +1008,7 @@ static int mv_probe(struct platform_device *pdev)
 	int ret;
 
 	if (cpg) {
-		printk(KERN_ERR "Second crypto dev?\n");
+		printk(KERN_ERR MV_CESA "Second crypto dev?\n");
 		return -EEXIST;
 	}
 
@@ -591,6 +1072,21 @@ static int mv_probe(struct platform_device *pdev)
 	ret = crypto_register_alg(&mv_aes_alg_cbc);
 	if (ret)
 		goto err_unreg_ecb;
+
+	ret = crypto_register_ahash(&mv_sha1_alg);
+	if (ret == 0)
+		cpg->has_sha1 = 1;
+	else
+		printk(KERN_WARNING MV_CESA "Could not register sha1 driver\n");
+
+	ret = crypto_register_ahash(&mv_hmac_sha1_alg);
+	if (ret == 0) {
+		cpg->has_hmac_sha1 = 1;
+	} else {
+		printk(KERN_WARNING MV_CESA
+		       "Could not register hmac-sha1 driver\n");
+	}
+
 	return 0;
 err_unreg_ecb:
 	crypto_unregister_alg(&mv_aes_alg_ecb);
@@ -615,6 +1111,10 @@ static int mv_remove(struct platform_device *pdev)
 
 	crypto_unregister_alg(&mv_aes_alg_ecb);
 	crypto_unregister_alg(&mv_aes_alg_cbc);
+	if (cp->has_sha1)
+		crypto_unregister_ahash(&mv_sha1_alg);
+	if (cp->has_hmac_sha1)
+		crypto_unregister_ahash(&mv_hmac_sha1_alg);
 	kthread_stop(cp->queue_th);
 	free_irq(cp->irq, cp);
 	memset(cp->sram, 0, cp->sram_size);
diff --git a/drivers/crypto/mv_cesa.h b/drivers/crypto/mv_cesa.h
index c3e25d3..08fcb11 100644
--- a/drivers/crypto/mv_cesa.h
+++ b/drivers/crypto/mv_cesa.h
@@ -1,6 +1,10 @@
 #ifndef __MV_CRYPTO_H__
 
 #define DIGEST_INITIAL_VAL_A	0xdd00
+#define DIGEST_INITIAL_VAL_B	0xdd04
+#define DIGEST_INITIAL_VAL_C	0xdd08
+#define DIGEST_INITIAL_VAL_D	0xdd0c
+#define DIGEST_INITIAL_VAL_E	0xdd10
 #define DES_CMD_REG		0xdd58
 
 #define SEC_ACCEL_CMD		0xde00
@@ -70,6 +74,10 @@ struct sec_accel_config {
 #define CFG_AES_LEN_128		(0 << 24)
 #define CFG_AES_LEN_192		(1 << 24)
 #define CFG_AES_LEN_256		(2 << 24)
+#define CFG_NOT_FRAG		(0 << 30)
+#define CFG_FIRST_FRAG		(1 << 30)
+#define CFG_LAST_FRAG		(2 << 30)
+#define CFG_MID_FRAG		(3 << 30)
 
 	u32 enc_p;
 #define ENC_P_SRC(x)		(x)
@@ -90,7 +98,11 @@ struct sec_accel_config {
 #define MAC_SRC_TOTAL_LEN(x)	((x) << 16)
 
 	u32 mac_digest;
+#define MAC_DIGEST_P(x)	(x)
+#define MAC_FRAG_LEN(x)	((x) << 16)
 	u32 mac_iv;
+#define MAC_INNER_IV_P(x)	(x)
+#define MAC_OUTER_IV_P(x)	((x) << 16)
 }__attribute__ ((packed));
 	/*
 	 * /-----------\ 0
@@ -101,19 +113,37 @@ struct sec_accel_config {
 	 * |  IV   IN  |	4 * 4
 	 * |-----------| 0x40 (inplace)
 	 * |  IV BUF   |	4 * 4
-	 * |-----------| 0x50
+	 * |-----------| 0x80
 	 * |  DATA IN  |	16 * x (max ->max_req_size)
-	 * |-----------| 0x50 (inplace operation)
+	 * |-----------| 0x80 (inplace operation)
 	 * |  DATA OUT |	16 * x (max ->max_req_size)
 	 * \-----------/ SRAM size
 	 */
+
+	/* Hashing memory map:
+	 * /-----------\ 0
+	 * | ACCEL CFG |        4 * 8
+	 * |-----------| 0x20
+	 * | Inner IV  |        5 * 4
+	 * |-----------| 0x34
+	 * | Outer IV  |        5 * 4
+	 * |-----------| 0x48
+	 * | Output BUF|        5 * 4
+	 * |-----------| 0x80
+	 * |  DATA IN  |        64 * x (max ->max_req_size)
+	 * \-----------/ SRAM size
+	 */
 #define SRAM_CONFIG		0x00
 #define SRAM_DATA_KEY_P		0x20
 #define SRAM_DATA_IV		0x40
 #define SRAM_DATA_IV_BUF	0x40
-#define SRAM_DATA_IN_START	0x50
-#define SRAM_DATA_OUT_START	0x50
+#define SRAM_DATA_IN_START	0x80
+#define SRAM_DATA_OUT_START	0x80
+
+#define SRAM_HMAC_IV_IN		0x20
+#define SRAM_HMAC_IV_OUT	0x34
+#define SRAM_DIGEST_BUF		0x48
 
-#define SRAM_CFG_SPACE		0x50
+#define SRAM_CFG_SPACE		0x80
 
 #endif
-- 
cgit v0.10.2


From 99700716a9b2e117fd50c6d3f1fd5edeef6dc6d2 Mon Sep 17 00:00:00 2001
From: Chihau Chau <chihau@gmail.com>
Date: Mon, 19 Apr 2010 21:02:41 +0800
Subject: crypto: geode-aes - Fix some code style issues

This fixes some code style issues like:

- Use #include <linux/io.h> instead of <asm/io.h> and #include
  <linux/delay.h> instead of <asm/delay.h>

- Use "foo *bar" instead of "foo * bar"

- Add a space after the for or while sentence and before the open
  parenthesis '('

- Don't use assignments in a if condition

Signed-off-by: Chihau Chau <chihau@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index c7a5a43..09389dd 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -15,14 +15,14 @@
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 
-#include <asm/io.h>
-#include <asm/delay.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include "geode-aes.h"
 
 /* Static structures */
 
-static void __iomem * _iobase;
+static void __iomem *_iobase;
 static spinlock_t lock;
 
 /* Write a 128 bit field (either a writable key or IV) */
@@ -30,7 +30,7 @@ static inline void
 _writefield(u32 offset, void *value)
 {
 	int i;
-	for(i = 0; i < 4; i++)
+	for (i = 0; i < 4; i++)
 		iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4));
 }
 
@@ -39,7 +39,7 @@ static inline void
 _readfield(u32 offset, void *value)
 {
 	int i;
-	for(i = 0; i < 4; i++)
+	for (i = 0; i < 4; i++)
 		((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4));
 }
 
@@ -59,7 +59,7 @@ do_crypt(void *src, void *dst, int len, u32 flags)
 	do {
 		status = ioread32(_iobase + AES_INTR_REG);
 		cpu_relax();
-	} while(!(status & AES_INTRA_PENDING) && --counter);
+	} while (!(status & AES_INTRA_PENDING) && --counter);
 
 	/* Clear the event */
 	iowrite32((status & 0xFF) | AES_INTRA_PENDING, _iobase + AES_INTR_REG);
@@ -317,7 +317,7 @@ geode_cbc_decrypt(struct blkcipher_desc *desc,
 	err = blkcipher_walk_virt(desc, &walk);
 	op->iv = walk.iv;
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_CBC;
@@ -349,7 +349,7 @@ geode_cbc_encrypt(struct blkcipher_desc *desc,
 	err = blkcipher_walk_virt(desc, &walk);
 	op->iv = walk.iv;
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_CBC;
@@ -429,7 +429,7 @@ geode_ecb_decrypt(struct blkcipher_desc *desc,
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_ECB;
@@ -459,7 +459,7 @@ geode_ecb_encrypt(struct blkcipher_desc *desc,
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_ECB;
@@ -518,11 +518,12 @@ static int __devinit
 geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int ret;
-
-	if ((ret = pci_enable_device(dev)))
+	ret = pci_enable_device(dev);
+	if (ret)
 		return ret;
 
-	if ((ret = pci_request_regions(dev, "geode-aes")))
+	ret = pci_request_regions(dev, "geode-aes");
+	if (ret)
 		goto eenable;
 
 	_iobase = pci_iomap(dev, 0, 0);
@@ -537,13 +538,16 @@ geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	/* Clear any pending activity */
 	iowrite32(AES_INTR_PENDING | AES_INTR_MASK, _iobase + AES_INTR_REG);
 
-	if ((ret = crypto_register_alg(&geode_alg)))
+	ret = crypto_register_alg(&geode_alg);
+	if (ret)
 		goto eiomap;
 
-	if ((ret = crypto_register_alg(&geode_ecb_alg)))
+	ret = crypto_register_alg(&geode_ecb_alg);
+	if (ret)
 		goto ealg;
 
-	if ((ret = crypto_register_alg(&geode_cbc_alg)))
+	ret = crypto_register_alg(&geode_cbc_alg);
+	if (ret)
 		goto eecb;
 
 	printk(KERN_NOTICE "geode-aes: GEODE AES engine enabled.\n");
-- 
cgit v0.10.2


From c614e109c184edd7900d9ff4d6de9ef94bc4d85b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 3 May 2010 11:08:15 +0800
Subject: crypto: algapi - Remove unneeded null check

We don't check "frontend" consistently in crypto_init_spawn2().  We
check it at the start of the function but then we dereference it
unconditionally in the parameter list when we call crypto_init_spawn().

I looked at the places that call crypto_init_spawn2() and "frontend" is
always a valid pointer so I removed the check for null.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 3e4524e..d49d709 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -543,7 +543,7 @@ int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
 {
 	int err = -EINVAL;
 
-	if (frontend && (alg->cra_flags ^ frontend->type) & frontend->maskset)
+	if ((alg->cra_flags ^ frontend->type) & frontend->maskset)
 		goto out;
 
 	spawn->frontend = frontend;
-- 
cgit v0.10.2


From ee5500c45c4860a84bba502c6d9ef5af6395dad6 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Date: Mon, 3 May 2010 11:10:03 +0800
Subject: crypto: omap - Updates omap sham device related platform code

- registration with multi OMAP kernels support
- clocks

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/arm/mach-omap2/clock2420_data.c b/arch/arm/mach-omap2/clock2420_data.c
index f12af95..fc55ab4 100644
--- a/arch/arm/mach-omap2/clock2420_data.c
+++ b/arch/arm/mach-omap2/clock2420_data.c
@@ -1836,7 +1836,7 @@ static struct omap_clk omap2420_clks[] = {
 	CLK(NULL,	"vlynq_ick",	&vlynq_ick,	CK_242X),
 	CLK(NULL,	"vlynq_fck",	&vlynq_fck,	CK_242X),
 	CLK(NULL,	"des_ick",	&des_ick,	CK_242X),
-	CLK(NULL,	"sha_ick",	&sha_ick,	CK_242X),
+	CLK("omap-sham",	"ick",	&sha_ick,	CK_242X),
 	CLK("omap_rng",	"ick",		&rng_ick,	CK_242X),
 	CLK(NULL,	"aes_ick",	&aes_ick,	CK_242X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_242X),
diff --git a/arch/arm/mach-omap2/clock2430_data.c b/arch/arm/mach-omap2/clock2430_data.c
index 0438b6e..5884ac6 100644
--- a/arch/arm/mach-omap2/clock2430_data.c
+++ b/arch/arm/mach-omap2/clock2430_data.c
@@ -1924,7 +1924,7 @@ static struct omap_clk omap2430_clks[] = {
 	CLK(NULL,	"sdma_ick",	&sdma_ick,	CK_243X),
 	CLK(NULL,	"sdrc_ick",	&sdrc_ick,	CK_243X),
 	CLK(NULL,	"des_ick",	&des_ick,	CK_243X),
-	CLK(NULL,	"sha_ick",	&sha_ick,	CK_243X),
+	CLK("omap-sham",	"ick",	&sha_ick,	CK_243X),
 	CLK("omap_rng",	"ick",		&rng_ick,	CK_243X),
 	CLK(NULL,	"aes_ick",	&aes_ick,	CK_243X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_243X),
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index d5153b6..5a974dc 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3360,7 +3360,7 @@ static struct omap_clk omap3xxx_clks[] = {
 	CLK("mmci-omap-hs.2",	"ick",	&mmchs3_ick,	CK_3430ES2 | CK_AM35XX),
 	CLK(NULL,	"icr_ick",	&icr_ick,	CK_343X),
 	CLK(NULL,	"aes2_ick",	&aes2_ick,	CK_343X),
-	CLK(NULL,	"sha12_ick",	&sha12_ick,	CK_343X),
+	CLK("omap-sham",	"ick",	&sha12_ick,	CK_343X),
 	CLK(NULL,	"des2_ick",	&des2_ick,	CK_343X),
 	CLK("mmci-omap-hs.1",	"ick",	&mmchs2_ick,	CK_3XXX),
 	CLK("mmci-omap-hs.0",	"ick",	&mmchs1_ick,	CK_3XXX),
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 23e4d77..7e7acc1 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -26,6 +26,7 @@
 #include <plat/mux.h>
 #include <mach/gpio.h>
 #include <plat/mmc.h>
+#include <plat/dma.h>
 
 #include "mux.h"
 
@@ -453,8 +454,10 @@ static void omap_init_mcspi(void)
 static inline void omap_init_mcspi(void) {}
 #endif
 
-#ifdef CONFIG_OMAP_SHA1_MD5
-static struct resource sha1_md5_resources[] = {
+#if defined(CONFIG_CRYPTO_DEV_OMAP_SHAM) || defined(CONFIG_CRYPTO_DEV_OMAP_SHAM_MODULE)
+
+#ifdef CONFIG_ARCH_OMAP24XX
+static struct resource omap2_sham_resources[] = {
 	{
 		.start	= OMAP24XX_SEC_SHA1MD5_BASE,
 		.end	= OMAP24XX_SEC_SHA1MD5_BASE + 0x64,
@@ -465,20 +468,55 @@ static struct resource sha1_md5_resources[] = {
 		.flags	= IORESOURCE_IRQ,
 	}
 };
+static int omap2_sham_resources_sz = ARRAY_SIZE(omap2_sham_resources);
+#else
+#define omap2_sham_resources		NULL
+#define omap2_sham_resources_sz		0
+#endif
 
-static struct platform_device sha1_md5_device = {
-	.name		= "OMAP SHA1/MD5",
+#ifdef CONFIG_ARCH_OMAP34XX
+static struct resource omap3_sham_resources[] = {
+	{
+		.start	= OMAP34XX_SEC_SHA1MD5_BASE,
+		.end	= OMAP34XX_SEC_SHA1MD5_BASE + 0x64,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= INT_34XX_SHA1MD52_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= OMAP34XX_DMA_SHA1MD5_RX,
+		.flags	= IORESOURCE_DMA,
+	}
+};
+static int omap3_sham_resources_sz = ARRAY_SIZE(omap3_sham_resources);
+#else
+#define omap3_sham_resources		NULL
+#define omap3_sham_resources_sz		0
+#endif
+
+static struct platform_device sham_device = {
+	.name		= "omap-sham",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(sha1_md5_resources),
-	.resource	= sha1_md5_resources,
 };
 
-static void omap_init_sha1_md5(void)
+static void omap_init_sham(void)
 {
-	platform_device_register(&sha1_md5_device);
+	if (cpu_is_omap24xx()) {
+		sham_device.resource = omap2_sham_resources;
+		sham_device.num_resources = omap2_sham_resources_sz;
+	} else if (cpu_is_omap34xx()) {
+		sham_device.resource = omap3_sham_resources;
+		sham_device.num_resources = omap3_sham_resources_sz;
+	} else {
+		pr_err("%s: platform not supported\n", __func__);
+		return;
+	}
+	platform_device_register(&sham_device);
 }
 #else
-static inline void omap_init_sha1_md5(void) { }
+static inline void omap_init_sham(void) { }
 #endif
 
 /*-------------------------------------------------------------------------*/
@@ -799,7 +837,7 @@ static int __init omap2_init_devices(void)
 	omap_init_mcspi();
 	omap_hdq_init();
 	omap_init_sti();
-	omap_init_sha1_md5();
+	omap_init_sham();
 
 	return 0;
 }
diff --git a/arch/arm/plat-omap/include/plat/omap34xx.h b/arch/arm/plat-omap/include/plat/omap34xx.h
index 2845fdc..98fc8b4 100644
--- a/arch/arm/plat-omap/include/plat/omap34xx.h
+++ b/arch/arm/plat-omap/include/plat/omap34xx.h
@@ -82,5 +82,10 @@
 
 #define OMAP34XX_MAILBOX_BASE		(L4_34XX_BASE + 0x94000)
 
+/* Security */
+#define OMAP34XX_SEC_BASE	(L4_34XX_BASE + 0xA0000)
+#define OMAP34XX_SEC_SHA1MD5_BASE	(OMAP34XX_SEC_BASE + 0x23000)
+#define OMAP34XX_SEC_AES_BASE	(OMAP34XX_SEC_BASE + 0x25000)
+
 #endif /* __ASM_ARCH_OMAP3_H */
 
-- 
cgit v0.10.2


From 8628e7c89075834fc7b44629d09ff4f9043af114 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Date: Mon, 3 May 2010 11:10:59 +0800
Subject: crypto: omap - sha1 & md5 driver

Earlier kernel contained omap sha1 and md5 driver, which was not maintained,
was not ported to new crypto APIs and removed from the source tree.

- implements async  crypto API using dma and cpu.
- supports multiple sham instances if available
- hmac
- concurrent requests

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b08403d..9073aa0 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,4 +222,13 @@ config CRYPTO_DEV_PPC4XX
 	help
 	  This option allows you to have support for AMCC crypto acceleration.
 
+config CRYPTO_DEV_OMAP_SHAM
+	tristate "Support for OMAP SHA1/MD5 hw accelerator"
+	depends on ARCH_OMAP2 || ARCH_OMAP3
+	select CRYPTO_SHA1
+	select CRYPTO_MD5
+	help
+	  OMAP processors have SHA1/MD5 hw accelerator. Select this if you
+	  want to use the OMAP module for SHA1/MD5 algorithms.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 6ffcb3f..c9494e1 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
+obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
+
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
new file mode 100644
index 0000000..8b03433
--- /dev/null
+++ b/drivers/crypto/omap-sham.c
@@ -0,0 +1,1259 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP SHA1/MD5 HW acceleration.
+ *
+ * Copyright (c) 2010 Nokia Corporation
+ * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from old omap-sha1-md5.c driver.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#include <plat/cpu.h>
+#include <plat/dma.h>
+#include <mach/irqs.h>
+
+#define SHA_REG_DIGEST(x)		(0x00 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x1C + ((x) * 0x04))
+
+#define SHA1_MD5_BLOCK_SIZE		SHA1_BLOCK_SIZE
+#define MD5_DIGEST_SIZE			16
+
+#define SHA_REG_DIGCNT			0x14
+
+#define SHA_REG_CTRL			0x18
+#define SHA_REG_CTRL_LENGTH		(0xFFFFFFFF << 5)
+#define SHA_REG_CTRL_CLOSE_HASH		(1 << 4)
+#define SHA_REG_CTRL_ALGO_CONST		(1 << 3)
+#define SHA_REG_CTRL_ALGO		(1 << 2)
+#define SHA_REG_CTRL_INPUT_READY	(1 << 1)
+#define SHA_REG_CTRL_OUTPUT_READY	(1 << 0)
+
+#define SHA_REG_REV			0x5C
+#define SHA_REG_REV_MAJOR		0xF0
+#define SHA_REG_REV_MINOR		0x0F
+
+#define SHA_REG_MASK			0x60
+#define SHA_REG_MASK_DMA_EN		(1 << 3)
+#define SHA_REG_MASK_IT_EN		(1 << 2)
+#define SHA_REG_MASK_SOFTRESET		(1 << 1)
+#define SHA_REG_AUTOIDLE		(1 << 0)
+
+#define SHA_REG_SYSSTATUS		0x64
+#define SHA_REG_SYSSTATUS_RESETDONE	(1 << 0)
+
+#define DEFAULT_TIMEOUT_INTERVAL	HZ
+
+#define FLAGS_FIRST		0x0001
+#define FLAGS_FINUP		0x0002
+#define FLAGS_FINAL		0x0004
+#define FLAGS_FAST		0x0008
+#define FLAGS_SHA1		0x0010
+#define FLAGS_DMA_ACTIVE	0x0020
+#define FLAGS_OUTPUT_READY	0x0040
+#define FLAGS_CLEAN		0x0080
+#define FLAGS_INIT		0x0100
+#define FLAGS_CPU		0x0200
+#define FLAGS_HMAC		0x0400
+
+/* 3rd byte */
+#define FLAGS_BUSY		16
+
+#define OP_UPDATE	1
+#define OP_FINAL	2
+
+struct omap_sham_dev;
+
+struct omap_sham_reqctx {
+	struct omap_sham_dev	*dd;
+	unsigned long		flags;
+	unsigned long		op;
+
+	size_t			digcnt;
+	u8			*buffer;
+	size_t			bufcnt;
+	size_t			buflen;
+	dma_addr_t		dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int		offset;	/* offset in current sg */
+	unsigned int		total;	/* total request */
+};
+
+struct omap_sham_hmac_ctx {
+	struct crypto_shash	*shash;
+	u8			ipad[SHA1_MD5_BLOCK_SIZE];
+	u8			opad[SHA1_MD5_BLOCK_SIZE];
+};
+
+struct omap_sham_ctx {
+	struct omap_sham_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+	struct omap_sham_hmac_ctx base[0];
+};
+
+#define OMAP_SHAM_QUEUE_LENGTH	1
+
+struct omap_sham_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	void __iomem		*io_base;
+	int			irq;
+	struct clk		*iclk;
+	spinlock_t		lock;
+	int			dma;
+	int			dma_lch;
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct omap_sham_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+	unsigned long		flags;
+};
+
+static struct omap_sham_drv sham = {
+	.dev_list = LIST_HEAD_INIT(sham.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(sham.lock),
+};
+
+static inline u32 omap_sham_read(struct omap_sham_dev *dd, u32 offset)
+{
+	return __raw_readl(dd->io_base + offset);
+}
+
+static inline void omap_sham_write(struct omap_sham_dev *dd,
+					u32 offset, u32 value)
+{
+	__raw_writel(value, dd->io_base + offset);
+}
+
+static inline void omap_sham_write_mask(struct omap_sham_dev *dd, u32 address,
+					u32 value, u32 mask)
+{
+	u32 val;
+
+	val = omap_sham_read(dd, address);
+	val &= ~mask;
+	val |= value;
+	omap_sham_write(dd, address, val);
+}
+
+static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
+{
+	unsigned long timeout = jiffies + DEFAULT_TIMEOUT_INTERVAL;
+
+	while (!(omap_sham_read(dd, offset) & bit)) {
+		if (time_is_before_jiffies(timeout))
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void omap_sham_copy_hash(struct ahash_request *req, int out)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)req->result;
+	int i;
+
+	if (likely(ctx->flags & FLAGS_SHA1)) {
+		/* SHA1 results are in big endian */
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			if (out)
+				hash[i] = be32_to_cpu(omap_sham_read(ctx->dd,
+							SHA_REG_DIGEST(i)));
+			else
+				omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+							cpu_to_be32(hash[i]));
+	} else {
+		/* MD5 results are in little endian */
+		for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
+			if (out)
+				hash[i] = le32_to_cpu(omap_sham_read(ctx->dd,
+							SHA_REG_DIGEST(i)));
+			else
+				omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+							cpu_to_le32(hash[i]));
+	}
+}
+
+static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
+				 int final, int dma)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 val = length << 5, mask;
+
+	if (unlikely(!ctx->digcnt)) {
+
+		clk_enable(dd->iclk);
+
+		if (!(dd->flags & FLAGS_INIT)) {
+			omap_sham_write_mask(dd, SHA_REG_MASK,
+				SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
+
+			if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
+						SHA_REG_SYSSTATUS_RESETDONE))
+				return -ETIMEDOUT;
+
+			dd->flags |= FLAGS_INIT;
+		}
+	} else {
+		omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
+	}
+
+	omap_sham_write_mask(dd, SHA_REG_MASK,
+		SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
+		SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN);
+	/*
+	 * Setting ALGO_CONST only for the first iteration
+	 * and CLOSE_HASH only for the last one.
+	 */
+	if (ctx->flags & FLAGS_SHA1)
+		val |= SHA_REG_CTRL_ALGO;
+	if (!ctx->digcnt)
+		val |= SHA_REG_CTRL_ALGO_CONST;
+	if (final)
+		val |= SHA_REG_CTRL_CLOSE_HASH;
+
+	mask = SHA_REG_CTRL_ALGO_CONST | SHA_REG_CTRL_CLOSE_HASH |
+			SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH;
+
+	omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
+
+	return 0;
+}
+
+static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int err, count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	err = omap_sham_write_ctrl(dd, length, final, 0);
+	if (err)
+		return err;
+
+	if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
+		return -ETIMEDOUT;
+
+	ctx->digcnt += length;
+
+	if (final)
+		ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	for (count = 0; count < len32; count++)
+		omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
+			      size_t length, int final)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int err, len32;
+
+	dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	/* flush cache entries related to our page */
+	if (dma_addr == ctx->dma_addr)
+		dma_sync_single_for_device(dd->dev, dma_addr, length,
+					   DMA_TO_DEVICE);
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
+			1, OMAP_DMA_SYNC_PACKET, dd->dma, OMAP_DMA_DST_SYNC);
+
+	omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_addr, 0, 0);
+
+	err = omap_sham_write_ctrl(dd, length, final, 1);
+	if (err)
+		return err;
+
+	ctx->digcnt += length;
+
+	if (final)
+		ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |= FLAGS_DMA_ACTIVE;
+
+	omap_start_dma(dd->dma_lch);
+
+	return -EINPROGRESS;
+}
+
+static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx,
+				const u8 *data, size_t length)
+{
+	size_t count = min(length, ctx->buflen - ctx->bufcnt);
+
+	count = min(count, ctx->total);
+	if (count <= 0)
+		return 0;
+	memcpy(ctx->buffer + ctx->bufcnt, data, count);
+	ctx->bufcnt += count;
+
+	return count;
+}
+
+static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
+{
+	size_t count;
+
+	while (ctx->sg) {
+		count = omap_sham_append_buffer(ctx,
+				sg_virt(ctx->sg) + ctx->offset,
+				ctx->sg->length - ctx->offset);
+		if (!count)
+			break;
+		ctx->offset += count;
+		ctx->total -= count;
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	if (!ctx->total)
+		return 0;
+
+	omap_sham_append_sg(ctx);
+
+	final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final);
+	}
+
+	return 0;
+}
+
+static int omap_sham_update_dma_fast(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length;
+
+	ctx->flags |= FLAGS_FAST;
+
+	length = min(ctx->total, sg_dma_len(ctx->sg));
+	ctx->total = length;
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->total -= length;
+
+	return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1);
+}
+
+static int omap_sham_update_cpu(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	omap_sham_append_sg(ctx);
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	omap_stop_dma(dd->dma_lch);
+	if (ctx->flags & FLAGS_FAST)
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+static void omap_sham_cleanup(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_dev *dd = ctx->dd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (ctx->flags & FLAGS_CLEAN) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return;
+	}
+	ctx->flags |= FLAGS_CLEAN;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (ctx->digcnt)
+		clk_disable(dd->iclk);
+
+	if (ctx->dma_addr)
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
+				 DMA_TO_DEVICE);
+
+	if (ctx->buffer)
+		free_page((unsigned long)ctx->buffer);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
+}
+
+static int omap_sham_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_dev *dd = NULL, *tmp;
+
+	spin_lock_bh(&sham.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &sham.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+	spin_unlock_bh(&sham.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	ctx->flags |= FLAGS_FIRST;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= FLAGS_SHA1;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+
+	ctx->buflen = PAGE_SIZE;
+	ctx->buffer = (void *)__get_free_page(
+				(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+				GFP_KERNEL : GFP_ATOMIC);
+	if (!ctx->buffer)
+		return -ENOMEM;
+
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
+		free_page((unsigned long)ctx->buffer);
+		return -EINVAL;
+	}
+
+	if (tctx->flags & FLAGS_HMAC) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+
+		memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE);
+		ctx->bufcnt = SHA1_MD5_BLOCK_SIZE;
+		ctx->flags |= FLAGS_HMAC;
+	}
+
+	return 0;
+
+}
+
+static int omap_sham_update_req(struct omap_sham_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & FLAGS_FINUP) != 0);
+
+	if (ctx->flags & FLAGS_CPU)
+		err = omap_sham_update_cpu(dd);
+	else if (ctx->flags & FLAGS_FAST)
+		err = omap_sham_update_dma_fast(dd);
+	else
+		err = omap_sham_update_dma_slow(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
+
+	return err;
+}
+
+static int omap_sham_final_req(struct omap_sham_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0, use_dma = 1;
+
+	if (ctx->bufcnt <= 64)
+		/* faster to handle last block with cpu */
+		use_dma = 0;
+
+	if (use_dma)
+		err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1);
+	else
+		err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
+
+	ctx->bufcnt = 0;
+
+	if (err != -EINPROGRESS)
+		omap_sham_cleanup(req);
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static int omap_sham_finish_req_hmac(struct ahash_request *req)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_hmac_ctx *bctx = tctx->base;
+	int bs = crypto_shash_blocksize(bctx->shash);
+	int ds = crypto_shash_digestsize(bctx->shash);
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(bctx->shash)];
+	} desc;
+
+	desc.shash.tfm = bctx->shash;
+	desc.shash.flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */
+
+	return crypto_shash_init(&desc.shash) ?:
+	       crypto_shash_update(&desc.shash, bctx->opad, bs) ?:
+	       crypto_shash_finup(&desc.shash, req->result, ds, req->result);
+}
+
+static void omap_sham_finish_req(struct ahash_request *req, int err)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!err) {
+		omap_sham_copy_hash(ctx->dd->req, 1);
+		if (ctx->flags & FLAGS_HMAC)
+			err = omap_sham_finish_req_hmac(req);
+	}
+
+	if (ctx->flags & FLAGS_FINAL)
+		omap_sham_cleanup(req);
+
+	clear_bit(FLAGS_BUSY, &ctx->dd->flags);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+}
+
+static int omap_sham_handle_queue(struct omap_sham_dev *dd)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct omap_sham_reqctx *ctx;
+	struct ahash_request *req, *prev_req;
+	unsigned long flags;
+	int err = 0;
+
+	if (test_and_set_bit(FLAGS_BUSY, &dd->flags))
+		return 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (!async_req)
+		clear_bit(FLAGS_BUSY, &dd->flags);
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return 0;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+
+	prev_req = dd->req;
+	dd->req = req;
+
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	if (req != prev_req && ctx->digcnt)
+		/* request has changed - restore hash */
+		omap_sham_copy_hash(req, 0);
+
+	if (ctx->op == OP_UPDATE) {
+		err = omap_sham_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & FLAGS_FINUP))
+			/* no final() after finup() */
+			err = omap_sham_final_req(dd);
+	} else if (ctx->op == OP_FINAL) {
+		err = omap_sham_final_req(dd);
+	}
+
+	if (err != -EINPROGRESS) {
+		/* done_task will not finish it, so do it here */
+		omap_sham_finish_req(req, err);
+		tasklet_schedule(&dd->queue_task);
+	}
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return err;
+}
+
+static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_dev *dd = tctx->dd;
+	unsigned long flags;
+	int err;
+
+	ctx->op = op;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	err = ahash_enqueue_request(&dd->queue, req);
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	omap_sham_handle_queue(dd);
+
+	return err;
+}
+
+static int omap_sham_update(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & FLAGS_FINUP) {
+		if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) {
+			/*
+			* OMAP HW accel works only with buffers >= 9
+			* will switch to bypass in final()
+			* final has the same request and data
+			*/
+			omap_sham_append_sg(ctx);
+			return 0;
+		} else if (ctx->bufcnt + ctx->total <= 64) {
+			ctx->flags |= FLAGS_CPU;
+		} else if (!ctx->bufcnt && sg_is_last(ctx->sg)) {
+			/* may be can use faster functions */
+			int aligned = IS_ALIGNED((u32)ctx->sg->offset,
+								sizeof(u32));
+
+			if (aligned && (ctx->flags & FLAGS_FIRST))
+				/* digest: first and final */
+				ctx->flags |= FLAGS_FAST;
+
+			ctx->flags &= ~FLAGS_FIRST;
+		}
+	} else if (ctx->bufcnt + ctx->total <= ctx->buflen) {
+		/* if not finaup -> not fast */
+		omap_sham_append_sg(ctx);
+		return 0;
+	}
+
+	return omap_sham_enqueue(req, OP_UPDATE);
+}
+
+static int omap_sham_shash_digest(struct crypto_shash *shash, u32 flags,
+				  const u8 *data, unsigned int len, u8 *out)
+{
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(shash)];
+	} desc;
+
+	desc.shash.tfm = shash;
+	desc.shash.flags = flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_shash_digest(&desc.shash, data, len, out);
+}
+
+static int omap_sham_final_shash(struct ahash_request *req)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	return omap_sham_shash_digest(tctx->fallback, req->base.flags,
+				      ctx->buffer, ctx->bufcnt, req->result);
+}
+
+static int omap_sham_final(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+
+	ctx->flags |= FLAGS_FINUP;
+
+	/* OMAP HW accel works only with buffers >= 9 */
+	/* HMAC is always >= 9 because of ipad */
+	if ((ctx->digcnt + ctx->bufcnt) < 9)
+		err = omap_sham_final_shash(req);
+	else if (ctx->bufcnt)
+		return omap_sham_enqueue(req, OP_FINAL);
+
+	omap_sham_cleanup(req);
+
+	return err;
+}
+
+static int omap_sham_finup(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= FLAGS_FINUP;
+
+	err1 = omap_sham_update(req);
+	if (err1 == -EINPROGRESS)
+		return err1;
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = omap_sham_final(req);
+
+	return err1 ?: err2;
+}
+
+static int omap_sham_digest(struct ahash_request *req)
+{
+	return omap_sham_init(req) ?: omap_sham_finup(req);
+}
+
+static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
+		      unsigned int keylen)
+{
+	struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct omap_sham_hmac_ctx *bctx = tctx->base;
+	int bs = crypto_shash_blocksize(bctx->shash);
+	int ds = crypto_shash_digestsize(bctx->shash);
+	int err, i;
+	err = crypto_shash_setkey(tctx->fallback, key, keylen);
+	if (err)
+		return err;
+
+	if (keylen > bs) {
+		err = omap_sham_shash_digest(bctx->shash,
+				crypto_shash_get_flags(bctx->shash),
+				key, keylen, bctx->ipad);
+		if (err)
+			return err;
+		keylen = ds;
+	} else {
+		memcpy(bctx->ipad, key, keylen);
+	}
+
+	memset(bctx->ipad + keylen, 0, bs - keylen);
+	memcpy(bctx->opad, bctx->ipad, bs);
+
+	for (i = 0; i < bs; i++) {
+		bctx->ipad[i] ^= 0x36;
+		bctx->opad[i] ^= 0x5c;
+	}
+
+	return err;
+}
+
+static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("omap-sham: fallback driver '%s' "
+				"could not be loaded.\n", alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct omap_sham_reqctx));
+
+	if (alg_base) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+		tctx->flags |= FLAGS_HMAC;
+		bctx->shash = crypto_alloc_shash(alg_base, 0,
+						CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(bctx->shash)) {
+			pr_err("omap-sham: base driver '%s' "
+					"could not be loaded.\n", alg_base);
+			crypto_free_shash(tctx->fallback);
+			return PTR_ERR(bctx->shash);
+		}
+
+	}
+
+	return 0;
+}
+
+static int omap_sham_cra_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, NULL);
+}
+
+static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, "sha1");
+}
+
+static int omap_sham_cra_md5_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, "md5");
+}
+
+static void omap_sham_cra_exit(struct crypto_tfm *tfm)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+
+	if (tctx->flags & FLAGS_HMAC) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+		crypto_free_shash(bctx->shash);
+	}
+}
+
+static struct ahash_alg algs[] = {
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "omap-sha1",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.halg.digestsize	= MD5_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "md5",
+		.cra_driver_name	= "omap-md5",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.setkey		= omap_sham_setkey,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "hmac(sha1)",
+		.cra_driver_name	= "omap-hmac-sha1",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx) +
+					sizeof(struct omap_sham_hmac_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_sha1_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.setkey		= omap_sham_setkey,
+	.halg.digestsize	= MD5_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "hmac(md5)",
+		.cra_driver_name	= "omap-hmac-md5",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx) +
+					sizeof(struct omap_sham_hmac_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_md5_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+}
+};
+
+static void omap_sham_done_task(unsigned long data)
+{
+	struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int ready = 1;
+
+	if (ctx->flags & FLAGS_OUTPUT_READY) {
+		ctx->flags &= ~FLAGS_OUTPUT_READY;
+		ready = 1;
+	}
+
+	if (dd->flags & FLAGS_DMA_ACTIVE) {
+		dd->flags &= ~FLAGS_DMA_ACTIVE;
+		omap_sham_update_dma_stop(dd);
+		omap_sham_update_dma_slow(dd);
+	}
+
+	if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) {
+		dev_dbg(dd->dev, "update done\n");
+		/* finish curent request */
+		omap_sham_finish_req(req, 0);
+		/* start new request */
+		omap_sham_handle_queue(dd);
+	}
+}
+
+static void omap_sham_queue_task(unsigned long data)
+{
+	struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+
+	omap_sham_handle_queue(dd);
+}
+
+static irqreturn_t omap_sham_irq(int irq, void *dev_id)
+{
+	struct omap_sham_dev *dd = dev_id;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (!ctx) {
+		dev_err(dd->dev, "unknown interrupt.\n");
+		return IRQ_HANDLED;
+	}
+
+	if (unlikely(ctx->flags & FLAGS_FINAL))
+		/* final -> allow device to go to power-saving mode */
+		omap_sham_write_mask(dd, SHA_REG_CTRL, 0, SHA_REG_CTRL_LENGTH);
+
+	omap_sham_write_mask(dd, SHA_REG_CTRL, SHA_REG_CTRL_OUTPUT_READY,
+				 SHA_REG_CTRL_OUTPUT_READY);
+	omap_sham_read(dd, SHA_REG_CTRL);
+
+	ctx->flags |= FLAGS_OUTPUT_READY;
+	tasklet_schedule(&dd->done_task);
+
+	return IRQ_HANDLED;
+}
+
+static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
+{
+	struct omap_sham_dev *dd = data;
+
+	if (likely(lch == dd->dma_lch))
+		tasklet_schedule(&dd->done_task);
+}
+
+static int omap_sham_dma_init(struct omap_sham_dev *dd)
+{
+	int err;
+
+	dd->dma_lch = -1;
+
+	err = omap_request_dma(dd->dma, dev_name(dd->dev),
+			omap_sham_dma_callback, dd, &dd->dma_lch);
+	if (err) {
+		dev_err(dd->dev, "Unable to request DMA channel\n");
+		return err;
+	}
+	omap_set_dma_dest_params(dd->dma_lch, 0,
+			OMAP_DMA_AMODE_CONSTANT,
+			dd->phys_base + SHA_REG_DIN(0), 0, 16);
+
+	omap_set_dma_dest_burst_mode(dd->dma_lch,
+			OMAP_DMA_DATA_BURST_16);
+
+	return 0;
+}
+
+static void omap_sham_dma_cleanup(struct omap_sham_dev *dd)
+{
+	if (dd->dma_lch >= 0) {
+		omap_free_dma(dd->dma_lch);
+		dd->dma_lch = -1;
+	}
+}
+
+static int __devinit omap_sham_probe(struct platform_device *pdev)
+{
+	struct omap_sham_dev *dd;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int err, i, j;
+
+	dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL);
+	if (dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto data_err;
+	}
+	dd->dev = dev;
+	platform_set_drvdata(pdev, dd);
+
+	INIT_LIST_HEAD(&dd->list);
+	spin_lock_init(&dd->lock);
+	tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd);
+	tasklet_init(&dd->queue_task, omap_sham_queue_task, (unsigned long)dd);
+	crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH);
+
+	dd->irq = -1;
+
+	/* Get the base address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	dd->phys_base = res->start;
+
+	/* Get the DMA */
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!res) {
+		dev_err(dev, "no DMA resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	dd->dma = res->start;
+
+	/* Get the IRQ */
+	dd->irq = platform_get_irq(pdev,  0);
+	if (dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(dd->irq, omap_sham_irq,
+			IRQF_TRIGGER_LOW, dev_name(dev), dd);
+	if (err) {
+		dev_err(dev, "unable to request irq.\n");
+		goto res_err;
+	}
+
+	err = omap_sham_dma_init(dd);
+	if (err)
+		goto dma_err;
+
+	/* Initializing the clock */
+	dd->iclk = clk_get(dev, "ick");
+	if (!dd->iclk) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = -ENODEV;
+		goto clk_err;
+	}
+
+	dd->io_base = ioremap(dd->phys_base, SZ_4K);
+	if (!dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto io_err;
+	}
+
+	clk_enable(dd->iclk);
+	dev_info(dev, "hw accel on OMAP rev %u.%u\n",
+		(omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4,
+		omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR);
+	clk_disable(dd->iclk);
+
+	spin_lock(&sham.lock);
+	list_add_tail(&dd->list, &sham.dev_list);
+	spin_unlock(&sham.lock);
+
+	for (i = 0; i < ARRAY_SIZE(algs); i++) {
+		err = crypto_register_ahash(&algs[i]);
+		if (err)
+			goto err_algs;
+	}
+
+	return 0;
+
+err_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&algs[j]);
+	iounmap(dd->io_base);
+io_err:
+	clk_put(dd->iclk);
+clk_err:
+	omap_sham_dma_cleanup(dd);
+dma_err:
+	if (dd->irq >= 0)
+		free_irq(dd->irq, dd);
+res_err:
+	kfree(dd);
+	dd = NULL;
+data_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit omap_sham_remove(struct platform_device *pdev)
+{
+	static struct omap_sham_dev *dd;
+	int i;
+
+	dd = platform_get_drvdata(pdev);
+	if (!dd)
+		return -ENODEV;
+	spin_lock(&sham.lock);
+	list_del(&dd->list);
+	spin_unlock(&sham.lock);
+	for (i = 0; i < ARRAY_SIZE(algs); i++)
+		crypto_unregister_ahash(&algs[i]);
+	tasklet_kill(&dd->done_task);
+	tasklet_kill(&dd->queue_task);
+	iounmap(dd->io_base);
+	clk_put(dd->iclk);
+	omap_sham_dma_cleanup(dd);
+	if (dd->irq >= 0)
+		free_irq(dd->irq, dd);
+	kfree(dd);
+	dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver omap_sham_driver = {
+	.probe	= omap_sham_probe,
+	.remove	= omap_sham_remove,
+	.driver	= {
+		.name	= "omap-sham",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init omap_sham_mod_init(void)
+{
+	pr_info("loading %s driver\n", "omap-sham");
+
+	if (!cpu_class_is_omap2() ||
+		omap_type() != OMAP2_DEVICE_TYPE_SEC) {
+		pr_err("Unsupported cpu\n");
+		return -ENODEV;
+	}
+
+	return platform_driver_register(&omap_sham_driver);
+}
+
+static void __exit omap_sham_mod_exit(void)
+{
+	platform_driver_unregister(&omap_sham_driver);
+}
+
+module_init(omap_sham_mod_init);
+module_exit(omap_sham_mod_exit);
+
+MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dmitry Kasatkin");
-- 
cgit v0.10.2


From 97e3d94aac1c3e95bd04d1b186479a4df3663ab8 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 29 Apr 2010 14:37:32 +0200
Subject: padata: Dont scale the parallel objects with the cpus

Scaling the maximum number of objects in the parallel
codepath can lead to out of memory problems on bigsmp
machines.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 0282478..5085046 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -28,7 +28,7 @@
 #include <linux/rcupdate.h>
 
 #define MAX_SEQ_NR INT_MAX - NR_CPUS
-#define MAX_OBJ_NUM 10000 * NR_CPUS
+#define MAX_OBJ_NUM 1000
 
 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
-- 
cgit v0.10.2


From e2cb2f1c2ccf19914e941859c07558ba5f8a4610 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 29 Apr 2010 14:40:10 +0200
Subject: padata: cpu hotplug code should depend on CONFIG_HOTPLUG_CPU

This patch makes the padata cpu hotplug code dependend on CONFIG_HOTPLUG_CPU.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 5b44d0f..1209a17 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -570,6 +570,7 @@ void padata_stop(struct padata_instance *pinst)
 }
 EXPORT_SYMBOL(padata_stop);
 
+#ifdef CONFIG_HOTPLUG_CPU
 static int padata_cpu_callback(struct notifier_block *nfb,
 			       unsigned long action, void *hcpu)
 {
@@ -621,6 +622,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	return NOTIFY_OK;
 }
+#endif
 
 /*
  * padata_alloc - allocate and initialize a padata instance
@@ -631,7 +633,6 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 				     struct workqueue_struct *wq)
 {
-	int err;
 	struct padata_instance *pinst;
 	struct parallel_data *pd;
 
@@ -654,18 +655,16 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 
 	pinst->flags = 0;
 
+#ifdef CONFIG_HOTPLUG_CPU
 	pinst->cpu_notifier.notifier_call = padata_cpu_callback;
 	pinst->cpu_notifier.priority = 0;
-	err = register_hotcpu_notifier(&pinst->cpu_notifier);
-	if (err)
-		goto err_free_cpumask;
+	register_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
 
 	mutex_init(&pinst->lock);
 
 	return pinst;
 
-err_free_cpumask:
-	free_cpumask_var(pinst->cpumask);
 err_free_pd:
 	padata_free_pd(pd);
 err_free_inst:
@@ -689,7 +688,9 @@ void padata_free(struct padata_instance *pinst)
 	while (atomic_read(&pinst->pd->refcnt) != 0)
 		yield();
 
+#ifdef CONFIG_HOTPLUG_CPU
 	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask);
 	kfree(pinst);
-- 
cgit v0.10.2


From 7d0d2d385ca7cc511f7d1c64735a1b4aaefd9a1b Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 29 Apr 2010 14:40:53 +0200
Subject: padata: Remove superfluous might_sleep

might_sleep() was placed before mutex_lock() in some places.
We remove them because mutex_lock() does might_sleep() too.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 1209a17..5fa6ba6 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -443,8 +443,6 @@ int padata_set_cpumask(struct padata_instance *pinst,
 	struct parallel_data *pd;
 	int err = 0;
 
-	might_sleep();
-
 	mutex_lock(&pinst->lock);
 
 	pd = padata_alloc_pd(pinst, cpumask);
@@ -489,8 +487,6 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
 	int err;
 
-	might_sleep();
-
 	mutex_lock(&pinst->lock);
 
 	cpumask_set_cpu(cpu, pinst->cpumask);
@@ -527,8 +523,6 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 {
 	int err;
 
-	might_sleep();
-
 	mutex_lock(&pinst->lock);
 
 	cpumask_clear_cpu(cpu, pinst->cpumask);
@@ -547,8 +541,6 @@ EXPORT_SYMBOL(padata_remove_cpu);
  */
 void padata_start(struct padata_instance *pinst)
 {
-	might_sleep();
-
 	mutex_lock(&pinst->lock);
 	pinst->flags |= PADATA_INIT;
 	mutex_unlock(&pinst->lock);
@@ -562,8 +554,6 @@ EXPORT_SYMBOL(padata_start);
  */
 void padata_stop(struct padata_instance *pinst)
 {
-	might_sleep();
-
 	mutex_lock(&pinst->lock);
 	pinst->flags &= ~PADATA_INIT;
 	mutex_unlock(&pinst->lock);
-- 
cgit v0.10.2


From 7b389b2cc539dc2dc60b049240942be54958c93a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 29 Apr 2010 14:41:36 +0200
Subject: padata: Initialize the padata queues only for the used cpus

padata_alloc_pd set up queues for all possible cpus.
This patch changes this to set up the queues just for
the used cpus.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 5fa6ba6..fc9f19a 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -358,17 +358,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
 		goto err_free_queue;
 
-	for_each_possible_cpu(cpu) {
+	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+
+	for_each_cpu(cpu, pd->cpumask) {
 		queue = per_cpu_ptr(pd->queue, cpu);
 
 		queue->pd = pd;
 
-		if (cpumask_test_cpu(cpu, cpumask)
-		    && cpumask_test_cpu(cpu, cpu_active_mask)) {
-			queue->cpu_index = cpu_index;
-			cpu_index++;
-		} else
-			queue->cpu_index = -1;
+		queue->cpu_index = cpu_index;
+		cpu_index++;
 
 		INIT_LIST_HEAD(&queue->reorder.list);
 		INIT_LIST_HEAD(&queue->parallel.list);
@@ -382,8 +380,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 		atomic_set(&queue->num_obj, 0);
 	}
 
-	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
-
 	num_cpus = cpumask_weight(pd->cpumask);
 	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
 
-- 
cgit v0.10.2


From 6751fb3c0e0cfcc40a1a0acabca97370c9ec6c6b Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 29 Apr 2010 14:42:30 +0200
Subject: padata: Use get_online_cpus/put_online_cpus

This patch puts get_online_cpus/put_online_cpus around the places
we modify the padata cpumask to ensure that no cpu goes offline
during this operation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index fc9f19a..82958e0 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -441,6 +441,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
 
 	mutex_lock(&pinst->lock);
 
+	get_online_cpus();
+
 	pd = padata_alloc_pd(pinst, cpumask);
 	if (!pd) {
 		err = -ENOMEM;
@@ -452,6 +454,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
 	padata_replace(pinst, pd);
 
 out:
+	put_online_cpus();
+
 	mutex_unlock(&pinst->lock);
 
 	return err;
@@ -485,8 +489,10 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
 
 	mutex_lock(&pinst->lock);
 
+	get_online_cpus();
 	cpumask_set_cpu(cpu, pinst->cpumask);
 	err = __padata_add_cpu(pinst, cpu);
+	put_online_cpus();
 
 	mutex_unlock(&pinst->lock);
 
@@ -521,8 +527,10 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 
 	mutex_lock(&pinst->lock);
 
+	get_online_cpus();
 	cpumask_clear_cpu(cpu, pinst->cpumask);
 	err = __padata_remove_cpu(pinst, cpu);
+	put_online_cpus();
 
 	mutex_unlock(&pinst->lock);
 
@@ -626,6 +634,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 	if (!pinst)
 		goto err;
 
+	get_online_cpus();
+
 	pd = padata_alloc_pd(pinst, cpumask);
 	if (!pd)
 		goto err_free_inst;
@@ -647,6 +657,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 	register_hotcpu_notifier(&pinst->cpu_notifier);
 #endif
 
+	put_online_cpus();
+
 	mutex_init(&pinst->lock);
 
 	return pinst;
@@ -655,6 +667,7 @@ err_free_pd:
 	padata_free_pd(pd);
 err_free_inst:
 	kfree(pinst);
+	put_online_cpus();
 err:
 	return NULL;
 }
-- 
cgit v0.10.2


From edc774ed0c7d75e92b53105b386a5b0ce94d4525 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Date: Fri, 14 May 2010 14:45:08 +1000
Subject: crypto: omap - OMAP macros corrected

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index beac46c..79dbf04 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -456,7 +456,7 @@ static inline void omap_init_mcspi(void) {}
 
 #if defined(CONFIG_CRYPTO_DEV_OMAP_SHAM) || defined(CONFIG_CRYPTO_DEV_OMAP_SHAM_MODULE)
 
-#ifdef CONFIG_ARCH_OMAP24XX
+#ifdef CONFIG_ARCH_OMAP2
 static struct resource omap2_sham_resources[] = {
 	{
 		.start	= OMAP24XX_SEC_SHA1MD5_BASE,
@@ -474,7 +474,7 @@ static int omap2_sham_resources_sz = ARRAY_SIZE(omap2_sham_resources);
 #define omap2_sham_resources_sz		0
 #endif
 
-#ifdef CONFIG_ARCH_OMAP34XX
+#ifdef CONFIG_ARCH_OMAP3
 static struct resource omap3_sham_resources[] = {
 	{
 		.start	= OMAP34XX_SEC_SHA1MD5_BASE,
-- 
cgit v0.10.2


From 5bdd5ded95b3188d58ba43ac801b8849cbea1b16 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 14 May 2010 14:58:05 +1000
Subject: crypto: mv_cesa - Use resource_size

Use the resource_size function instead of manually calculating the
resource size. This reduces the chance of introducing off-by-one errors.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 18a436c..e095422 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -1023,7 +1023,7 @@ static int mv_probe(struct platform_device *pdev)
 
 	spin_lock_init(&cp->lock);
 	crypto_init_queue(&cp->queue, 50);
-	cp->reg = ioremap(res->start, res->end - res->start + 1);
+	cp->reg = ioremap(res->start, resource_size(res));
 	if (!cp->reg) {
 		ret = -ENOMEM;
 		goto err;
@@ -1034,7 +1034,7 @@ static int mv_probe(struct platform_device *pdev)
 		ret = -ENXIO;
 		goto err_unmap_reg;
 	}
-	cp->sram_size = res->end - res->start + 1;
+	cp->sram_size = resource_size(res);
 	cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
 	cp->sram = ioremap(res->start, cp->sram_size);
 	if (!cp->sram) {
-- 
cgit v0.10.2


From 18eb8ea6ee4cc9ed39b45f95b734f523bcfb586b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 19 May 2010 11:50:58 +1000
Subject: crypto: shash - Remove usage of CRYPTO_MINALIGN

The macro CRYPTO_MINALIGN is not meant to be used directly.  This
patch replaces it with crypto_tfm_ctx_alignment.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/shash.c b/crypto/shash.c
index 91f7b9d..22fd943 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -37,7 +37,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 	u8 *buffer, *alignbuffer;
 	int err;
 
-	absize = keylen + (alignmask & ~(CRYPTO_MINALIGN - 1));
+	absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
 	buffer = kmalloc(absize, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
-- 
cgit v0.10.2


From d46a5ac7a7e2045e33c6ad6ffb8cf18a7e86a15a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 19 May 2010 13:43:14 +1000
Subject: padata: Use a timer to handle remaining objects in the reorder queues

padata_get_next needs to check whether the next object that
need serialization must be parallel processed by the local cpu.
This check was wrong implemented and returned always true,
so the try_again loop in padata_reorder was never taken. This
can lead to object leaks in some rare cases due to a race that
appears with the trylock in padata_reorder. The try_again loop
was not a good idea after all, because a cpu could take that
loop frequently, so we handle this with a timer instead.

This patch adds a timer to handle the race that appears with
the trylock. If cpu1 queues an object to the reorder queue while
cpu2 holds the pd->lock but left the while loop in padata_reorder
already, cpu2 can't care for this object and cpu1 exits because
it can't get the lock. Usually the next cpu that takes the lock
cares for this object too. We need the timer just if this object
was the last one that arrives to the reorder queues. The timer
function sends it out in this case.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 51611da..64836a6 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/timer.h>
 
 struct padata_priv {
 	struct list_head	list;
@@ -60,6 +61,7 @@ struct parallel_data {
 	unsigned int		max_seq_nr;
 	cpumask_var_t		cpumask;
 	spinlock_t              lock;
+	struct timer_list       timer;
 };
 
 struct padata_instance {
diff --git a/kernel/padata.c b/kernel/padata.c
index 82958e0..6d7ea48 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -231,7 +231,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 		goto out;
 	}
 
-	if (next_nr % num_cpus == next_queue->cpu_index) {
+	queue = per_cpu_ptr(pd->queue, smp_processor_id());
+	if (queue->cpu_index == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);
 		goto out;
 	}
@@ -247,9 +248,8 @@ static void padata_reorder(struct parallel_data *pd)
 	struct padata_queue *queue;
 	struct padata_instance *pinst = pd->pinst;
 
-try_again:
 	if (!spin_trylock_bh(&pd->lock))
-		goto out;
+		return;
 
 	while (1) {
 		padata = padata_get_next(pd);
@@ -258,8 +258,9 @@ try_again:
 			break;
 
 		if (PTR_ERR(padata) == -ENODATA) {
+			del_timer(&pd->timer);
 			spin_unlock_bh(&pd->lock);
-			goto out;
+			return;
 		}
 
 		queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +274,22 @@ try_again:
 
 	spin_unlock_bh(&pd->lock);
 
-	if (atomic_read(&pd->reorder_objects))
-		goto try_again;
+	if (atomic_read(&pd->reorder_objects)
+			&& !(pinst->flags & PADATA_RESET))
+		mod_timer(&pd->timer, jiffies + HZ);
+	else
+		del_timer(&pd->timer);
 
-out:
 	return;
 }
 
+static void padata_reorder_timer(unsigned long arg)
+{
+	struct parallel_data *pd = (struct parallel_data *)arg;
+
+	padata_reorder(pd);
+}
+
 static void padata_serial_worker(struct work_struct *work)
 {
 	struct padata_queue *queue;
@@ -383,6 +393,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	num_cpus = cpumask_weight(pd->cpumask);
 	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
 
+	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
-- 
cgit v0.10.2


From 2b73b07ab8a44ce171e07a328439f311481a7ea7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 19 May 2010 13:43:46 +1000
Subject: padata: Flush the padata queues actively

yield was used to wait until all references of the internal control
structure in use are dropped before it is freed. This patch implements
padata_flush_queues which actively flushes the padata percpu queues
in this case.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index 6d7ea48..ec6b8b7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -417,6 +417,29 @@ static void padata_free_pd(struct parallel_data *pd)
 	kfree(pd);
 }
 
+static void padata_flush_queues(struct parallel_data *pd)
+{
+	int cpu;
+	struct padata_queue *queue;
+
+	for_each_cpu(cpu, pd->cpumask) {
+		queue = per_cpu_ptr(pd->queue, cpu);
+		flush_work(&queue->pwork);
+	}
+
+	del_timer_sync(&pd->timer);
+
+	if (atomic_read(&pd->reorder_objects))
+		padata_reorder(pd);
+
+	for_each_cpu(cpu, pd->cpumask) {
+		queue = per_cpu_ptr(pd->queue, cpu);
+		flush_work(&queue->swork);
+	}
+
+	BUG_ON(atomic_read(&pd->refcnt) != 0);
+}
+
 static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
 {
@@ -428,11 +451,7 @@ static void padata_replace(struct padata_instance *pinst,
 
 	synchronize_rcu();
 
-	while (atomic_read(&pd_old->refcnt) != 0)
-		yield();
-
-	flush_workqueue(pinst->wq);
-
+	padata_flush_queues(pd_old);
 	padata_free_pd(pd_old);
 
 	pinst->flags &= ~PADATA_RESET;
@@ -695,12 +714,10 @@ void padata_free(struct padata_instance *pinst)
 
 	synchronize_rcu();
 
-	while (atomic_read(&pinst->pd->refcnt) != 0)
-		yield();
-
 #ifdef CONFIG_HOTPLUG_CPU
 	unregister_hotcpu_notifier(&pinst->cpu_notifier);
 #endif
+	padata_flush_queues(pinst->pd);
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask);
 	kfree(pinst);
-- 
cgit v0.10.2


From 0198ffd135f51d4fbb0c50036395716c06632ed9 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 19 May 2010 13:44:27 +1000
Subject: padata: Add some code comments

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 64836a6..8d84062 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -26,6 +26,17 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 
+/**
+ * struct padata_priv -  Embedded to the users data structure.
+ *
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+ * @serial: Serial complete function.
+ */
 struct padata_priv {
 	struct list_head	list;
 	struct parallel_data	*pd;
@@ -36,11 +47,29 @@ struct padata_priv {
 	void                    (*serial)(struct padata_priv *padata);
 };
 
+/**
+ * struct padata_list
+ *
+ * @list: List head.
+ * @lock: List lock.
+ */
 struct padata_list {
 	struct list_head        list;
 	spinlock_t              lock;
 };
 
+/**
+ * struct padata_queue - The percpu padata queues.
+ *
+ * @parallel: List to wait for parallelization.
+ * @reorder: List to wait for reordering after parallel processing.
+ * @serial: List to wait for serialization after reordering.
+ * @pwork: work struct for parallelization.
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
 struct padata_queue {
 	struct padata_list	parallel;
 	struct padata_list	reorder;
@@ -52,6 +81,20 @@ struct padata_queue {
 	int			cpu_index;
 };
 
+/**
+ * struct parallel_data - Internal control structure, covers everything
+ * that depends on the cpumask in use.
+ *
+ * @pinst: padata instance.
+ * @queue: percpu padata queues.
+ * @seq_nr: The sequence number that will be attached to the next object.
+ * @reorder_objects: Number of objects waiting in the reorder queues.
+ * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @max_seq_nr:  Maximal used sequence number.
+ * @cpumask: cpumask in use.
+ * @lock: Reorder lock.
+ * @timer: Reorder timer.
+ */
 struct parallel_data {
 	struct padata_instance	*pinst;
 	struct padata_queue	*queue;
@@ -64,6 +107,16 @@ struct parallel_data {
 	struct timer_list       timer;
 };
 
+/**
+ * struct padata_instance - The overall control structure.
+ *
+ * @cpu_notifier: cpu hotplug notifier.
+ * @wq: The workqueue in use.
+ * @pd: The internal control structure.
+ * @cpumask: User supplied cpumask.
+ * @lock: padata instance lock.
+ * @flags: padata flags.
+ */
 struct padata_instance {
 	struct notifier_block   cpu_notifier;
 	struct workqueue_struct *wq;
diff --git a/kernel/padata.c b/kernel/padata.c
index ec6b8b7..ca89dfb 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
 	local_bh_enable();
 }
 
-/*
+/**
  * padata_do_parallel - padata parallelization function
  *
  * @pinst: padata instance
@@ -152,6 +152,23 @@ out:
 }
 EXPORT_SYMBOL(padata_do_parallel);
 
+/*
+ * padata_get_next - Get the next object that needs serialization.
+ *
+ * Return values are:
+ *
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+ * NULL, if all percpu reorder queues are empty.
+ *
+ * -EINPROGRESS, if the next object that needs serialization will
+ *  be parallel processed by another cpu and is not yet present in
+ *  the cpu's reorder queue.
+ *
+ * -ENODATA, if this cpu has to do the parallel processing for
+ *  the next object.
+ */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
 	int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 
 		/*
 		 * Calculate the seq_nr of the object that should be
-		 * next in this queue.
+		 * next in this reorder queue.
 		 */
 		overrun = 0;
 		calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -248,15 +265,36 @@ static void padata_reorder(struct parallel_data *pd)
 	struct padata_queue *queue;
 	struct padata_instance *pinst = pd->pinst;
 
+	/*
+	 * We need to ensure that only one cpu can work on dequeueing of
+	 * the reorder queue the time. Calculating in which percpu reorder
+	 * queue the next object will arrive takes some time. A spinlock
+	 * would be highly contended. Also it is not clear in which order
+	 * the objects arrive to the reorder queues. So a cpu could wait to
+	 * get the lock just to notice that there is nothing to do at the
+	 * moment. Therefore we use a trylock and let the holder of the lock
+	 * care for all the objects enqueued during the holdtime of the lock.
+	 */
 	if (!spin_trylock_bh(&pd->lock))
 		return;
 
 	while (1) {
 		padata = padata_get_next(pd);
 
+		/*
+		 * All reorder queues are empty, or the next object that needs
+		 * serialization is parallel processed by another cpu and is
+		 * still on it's way to the cpu's reorder queue, nothing to
+		 * do for now.
+		 */
 		if (!padata || PTR_ERR(padata) == -EINPROGRESS)
 			break;
 
+		/*
+		 * This cpu has to do the parallel processing of the next
+		 * object. It's waiting in the cpu's parallelization queue,
+		 * so exit imediately.
+		 */
 		if (PTR_ERR(padata) == -ENODATA) {
 			del_timer(&pd->timer);
 			spin_unlock_bh(&pd->lock);
@@ -274,6 +312,11 @@ static void padata_reorder(struct parallel_data *pd)
 
 	spin_unlock_bh(&pd->lock);
 
+	/*
+	 * The next object that needs serialization might have arrived to
+	 * the reorder queues in the meantime, we will be called again
+	 * from the timer function if noone else cares for it.
+	 */
 	if (atomic_read(&pd->reorder_objects)
 			&& !(pinst->flags & PADATA_RESET))
 		mod_timer(&pd->timer, jiffies + HZ);
@@ -318,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
 	local_bh_enable();
 }
 
-/*
+/**
  * padata_do_serial - padata serialization function
  *
  * @padata: object to be serialized.
@@ -348,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
 }
 EXPORT_SYMBOL(padata_do_serial);
 
+/* Allocate and initialize the internal cpumask dependend resources. */
 static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 					     const struct cpumask *cpumask)
 {
@@ -417,6 +461,7 @@ static void padata_free_pd(struct parallel_data *pd)
 	kfree(pd);
 }
 
+/* Flush all objects out of the padata queues. */
 static void padata_flush_queues(struct parallel_data *pd)
 {
 	int cpu;
@@ -440,6 +485,7 @@ static void padata_flush_queues(struct parallel_data *pd)
 	BUG_ON(atomic_read(&pd->refcnt) != 0);
 }
 
+/* Replace the internal control stucture with a new one. */
 static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
 {
@@ -457,7 +503,7 @@ static void padata_replace(struct padata_instance *pinst,
 	pinst->flags &= ~PADATA_RESET;
 }
 
-/*
+/**
  * padata_set_cpumask - set the cpumask that padata should use
  *
  * @pinst: padata instance
@@ -507,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 	return 0;
 }
 
-/*
+/**
  * padata_add_cpu - add a cpu to the padata cpumask
  *
  * @pinst: padata instance
@@ -545,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 	return 0;
 }
 
-/*
+/**
  * padata_remove_cpu - remove a cpu from the padata cpumask
  *
  * @pinst: padata instance
@@ -568,7 +614,7 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 }
 EXPORT_SYMBOL(padata_remove_cpu);
 
-/*
+/**
  * padata_start - start the parallel processing
  *
  * @pinst: padata instance to start
@@ -581,7 +627,7 @@ void padata_start(struct padata_instance *pinst)
 }
 EXPORT_SYMBOL(padata_start);
 
-/*
+/**
  * padata_stop - stop the parallel processing
  *
  * @pinst: padata instance to stop
@@ -648,7 +694,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 }
 #endif
 
-/*
+/**
  * padata_alloc - allocate and initialize a padata instance
  *
  * @cpumask: cpumask that padata uses for parallelization
@@ -703,10 +749,10 @@ err:
 }
 EXPORT_SYMBOL(padata_alloc);
 
-/*
+/**
  * padata_free - free a padata instance
  *
- * @ padata_inst: padata instance to free
+ * @padata_inst: padata instance to free
  */
 void padata_free(struct padata_instance *pinst)
 {
-- 
cgit v0.10.2


From 3789ae7dcd29fa146b23dc30377925d1f73c8adf Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 19 May 2010 13:45:35 +1000
Subject: padata: Use get_online_cpus/put_online_cpus in padata_free

Add get_online_cpus/put_online_cpus to ensure that no cpu goes
offline during the flushing of the padata percpu queues.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/kernel/padata.c b/kernel/padata.c
index ca89dfb..b1c9857 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -763,7 +763,10 @@ void padata_free(struct padata_instance *pinst)
 #ifdef CONFIG_HOTPLUG_CPU
 	unregister_hotcpu_notifier(&pinst->cpu_notifier);
 #endif
+	get_online_cpus();
 	padata_flush_queues(pinst->pd);
+	put_online_cpus();
+
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask);
 	kfree(pinst);
-- 
cgit v0.10.2


From 3385329a0a0f1d31ca1d011c7887006a7e5a9902 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 13:56:37 +1000
Subject: crypto: hifn_795x - Rename ablkcipher_walk to hifn_cipher_walk

This is in preparation for the generic ablkcipher_walk helpers that
will be added to the crypto layer.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 73e8b17..16fce3a 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -638,7 +638,7 @@ struct hifn_crypto_alg
 
 #define ASYNC_FLAGS_MISALIGNED	(1<<0)
 
-struct ablkcipher_walk
+struct hifn_cipher_walk
 {
 	struct scatterlist	cache[ASYNC_SCATTERLIST_CACHE];
 	u32			flags;
@@ -657,7 +657,7 @@ struct hifn_request_context
 	u8			*iv;
 	unsigned int		ivsize;
 	u8			op, type, mode, unused;
-	struct ablkcipher_walk	walk;
+	struct hifn_cipher_walk	walk;
 };
 
 #define crypto_alg_to_hifn(a)	container_of(a, struct hifn_crypto_alg, alg)
@@ -1417,7 +1417,7 @@ static int hifn_setup_dma(struct hifn_device *dev,
 	return 0;
 }
 
-static int ablkcipher_walk_init(struct ablkcipher_walk *w,
+static int hifn_cipher_walk_init(struct hifn_cipher_walk *w,
 		int num, gfp_t gfp_flags)
 {
 	int i;
@@ -1442,7 +1442,7 @@ static int ablkcipher_walk_init(struct ablkcipher_walk *w,
 	return i;
 }
 
-static void ablkcipher_walk_exit(struct ablkcipher_walk *w)
+static void hifn_cipher_walk_exit(struct hifn_cipher_walk *w)
 {
 	int i;
 
@@ -1486,8 +1486,8 @@ static int ablkcipher_add(unsigned int *drestp, struct scatterlist *dst,
 	return idx;
 }
 
-static int ablkcipher_walk(struct ablkcipher_request *req,
-		struct ablkcipher_walk *w)
+static int hifn_cipher_walk(struct ablkcipher_request *req,
+		struct hifn_cipher_walk *w)
 {
 	struct scatterlist *dst, *t;
 	unsigned int nbytes = req->nbytes, offset, copy, diff;
@@ -1600,12 +1600,12 @@ static int hifn_setup_session(struct ablkcipher_request *req)
 	}
 
 	if (rctx->walk.flags & ASYNC_FLAGS_MISALIGNED) {
-		err = ablkcipher_walk_init(&rctx->walk, idx, GFP_ATOMIC);
+		err = hifn_cipher_walk_init(&rctx->walk, idx, GFP_ATOMIC);
 		if (err < 0)
 			return err;
 	}
 
-	sg_num = ablkcipher_walk(req, &rctx->walk);
+	sg_num = hifn_cipher_walk(req, &rctx->walk);
 	if (sg_num < 0) {
 		err = sg_num;
 		goto err_out_exit;
@@ -1806,7 +1806,7 @@ static void hifn_process_ready(struct ablkcipher_request *req, int error)
 			kunmap_atomic(saddr, KM_SOFTIRQ0);
 		}
 
-		ablkcipher_walk_exit(&rctx->walk);
+		hifn_cipher_walk_exit(&rctx->walk);
 	}
 
 	req->base.complete(&req->base, error);
-- 
cgit v0.10.2


From 85c6201a80ce4464a52c58a5f5ea8de15a557a6f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 14:06:29 +1000
Subject: crypto: scatterwalk - Fix scatterwalk_done() test

We are done with the scattergather entry when the walk offset goes
past sg->offset + sg->length, not when it crosses a page boundary.

There is a similarly queer test in the second half of
scatterwalk_pagedone() that probably needs some scrutiny.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 3de89a4..41e529a 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -68,7 +68,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 
 void scatterwalk_done(struct scatter_walk *walk, int out, int more)
 {
-	if (!offset_in_page(walk->offset) || !more)
+	if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
 		scatterwalk_pagedone(walk, out, more);
 }
 EXPORT_SYMBOL_GPL(scatterwalk_done);
-- 
cgit v0.10.2


From beb63da739f797519aa9990297697abf4db1ac0d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 14:11:21 +1000
Subject: crypto: tcrypt - Add speed tests for async hashing

These are invoked in the 'mode' range of 400 to 499.

The cost of async vs. sync for the software algorithm implementations
varies.  It can be as low as 16 cycles but as much as a couple hundred.

Here two runs of md5 testing, async then sync:

testing speed of async md5
test  0 (   16 byte blocks,   16 bytes per update,   1 updates):   2448 cycles/operation,  153 cycles/byte
test  1 (   64 byte blocks,   16 bytes per update,   4 updates):   4992 cycles/operation,   78 cycles/byte
test  2 (   64 byte blocks,   64 bytes per update,   1 updates):   3808 cycles/operation,   59 cycles/byte
test  3 (  256 byte blocks,   16 bytes per update,  16 updates):  14000 cycles/operation,   54 cycles/byte
test  4 (  256 byte blocks,   64 bytes per update,   4 updates):   8480 cycles/operation,   33 cycles/byte
test  5 (  256 byte blocks,  256 bytes per update,   1 updates):   7280 cycles/operation,   28 cycles/byte
test  6 ( 1024 byte blocks,   16 bytes per update,  64 updates):  50016 cycles/operation,   48 cycles/byte
test  7 ( 1024 byte blocks,  256 bytes per update,   4 updates):  22496 cycles/operation,   21 cycles/byte
test  8 ( 1024 byte blocks, 1024 bytes per update,   1 updates):  21232 cycles/operation,   20 cycles/byte
test  9 ( 2048 byte blocks,   16 bytes per update, 128 updates): 117184 cycles/operation,   57 cycles/byte
test 10 ( 2048 byte blocks,  256 bytes per update,   8 updates):  43008 cycles/operation,   21 cycles/byte
test 11 ( 2048 byte blocks, 1024 bytes per update,   2 updates):  40176 cycles/operation,   19 cycles/byte
test 12 ( 2048 byte blocks, 2048 bytes per update,   1 updates):  39888 cycles/operation,   19 cycles/byte
test 13 ( 4096 byte blocks,   16 bytes per update, 256 updates): 194176 cycles/operation,   47 cycles/byte
test 14 ( 4096 byte blocks,  256 bytes per update,  16 updates):  84096 cycles/operation,   20 cycles/byte
test 15 ( 4096 byte blocks, 1024 bytes per update,   4 updates):  78336 cycles/operation,   19 cycles/byte
test 16 ( 4096 byte blocks, 4096 bytes per update,   1 updates):  77120 cycles/operation,   18 cycles/byte
test 17 ( 8192 byte blocks,   16 bytes per update, 512 updates): 403056 cycles/operation,   49 cycles/byte
test 18 ( 8192 byte blocks,  256 bytes per update,  32 updates): 166112 cycles/operation,   20 cycles/byte
test 19 ( 8192 byte blocks, 1024 bytes per update,   8 updates): 154768 cycles/operation,   18 cycles/byte
test 20 ( 8192 byte blocks, 4096 bytes per update,   2 updates): 151904 cycles/operation,   18 cycles/byte
test 21 ( 8192 byte blocks, 8192 bytes per update,   1 updates): 155456 cycles/operation,   18 cycles/byte

testing speed of md5
test  0 (   16 byte blocks,   16 bytes per update,   1 updates):   2208 cycles/operation,  138 cycles/byte
test  1 (   64 byte blocks,   16 bytes per update,   4 updates):   5008 cycles/operation,   78 cycles/byte
test  2 (   64 byte blocks,   64 bytes per update,   1 updates):   3600 cycles/operation,   56 cycles/byte
test  3 (  256 byte blocks,   16 bytes per update,  16 updates):  14080 cycles/operation,   55 cycles/byte
test  4 (  256 byte blocks,   64 bytes per update,   4 updates):   8560 cycles/operation,   33 cycles/byte
test  5 (  256 byte blocks,  256 bytes per update,   1 updates):   7040 cycles/operation,   27 cycles/byte
test  6 ( 1024 byte blocks,   16 bytes per update,  64 updates):  50592 cycles/operation,   49 cycles/byte
test  7 ( 1024 byte blocks,  256 bytes per update,   4 updates):  22736 cycles/operation,   22 cycles/byte
test  8 ( 1024 byte blocks, 1024 bytes per update,   1 updates):  24960 cycles/operation,   24 cycles/byte
test  9 ( 2048 byte blocks,   16 bytes per update, 128 updates):  99312 cycles/operation,   48 cycles/byte
test 10 ( 2048 byte blocks,  256 bytes per update,   8 updates):  43520 cycles/operation,   21 cycles/byte
test 11 ( 2048 byte blocks, 1024 bytes per update,   2 updates):  40704 cycles/operation,   19 cycles/byte
test 12 ( 2048 byte blocks, 2048 bytes per update,   1 updates):  39552 cycles/operation,   19 cycles/byte
test 13 ( 4096 byte blocks,   16 bytes per update, 256 updates): 196720 cycles/operation,   48 cycles/byte
test 14 ( 4096 byte blocks,  256 bytes per update,  16 updates):  85152 cycles/operation,   20 cycles/byte
test 15 ( 4096 byte blocks, 1024 bytes per update,   4 updates):  79408 cycles/operation,   19 cycles/byte
test 16 ( 4096 byte blocks, 4096 bytes per update,   1 updates):  76816 cycles/operation,   18 cycles/byte
test 17 ( 8192 byte blocks,   16 bytes per update, 512 updates): 391520 cycles/operation,   47 cycles/byte
test 18 ( 8192 byte blocks,  256 bytes per update,  32 updates): 168464 cycles/operation,   20 cycles/byte
test 19 ( 8192 byte blocks, 1024 bytes per update,   8 updates): 156912 cycles/operation,   19 cycles/byte
test 20 ( 8192 byte blocks, 4096 bytes per update,   2 updates): 154016 cycles/operation,   18 cycles/byte
test 21 ( 8192 byte blocks, 8192 bytes per update,   1 updates): 153856 cycles/operation,   18 cycles/byte

We can ditch the sync hash code at some point if we feel that makes
sense.  For now I've left it there.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ea610ad..3ca68f9 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -394,6 +394,17 @@ out:
 	return 0;
 }
 
+static void test_hash_sg_init(struct scatterlist *sg)
+{
+	int i;
+
+	sg_init_table(sg, TVMEMSIZE);
+	for (i = 0; i < TVMEMSIZE; i++) {
+		sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
+		memset(tvmem[i], 0xff, PAGE_SIZE);
+	}
+}
+
 static void test_hash_speed(const char *algo, unsigned int sec,
 			    struct hash_speed *speed)
 {
@@ -423,12 +434,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 		goto out;
 	}
 
-	sg_init_table(sg, TVMEMSIZE);
-	for (i = 0; i < TVMEMSIZE; i++) {
-		sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
-		memset(tvmem[i], 0xff, PAGE_SIZE);
-	}
-
+	test_hash_sg_init(sg);
 	for (i = 0; speed[i].blen != 0; i++) {
 		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
 			printk(KERN_ERR
@@ -461,6 +467,250 @@ out:
 	crypto_free_hash(tfm);
 }
 
+struct tcrypt_result {
+	struct completion completion;
+	int err;
+};
+
+static void tcrypt_complete(struct crypto_async_request *req, int err)
+{
+	struct tcrypt_result *res = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	res->err = err;
+	complete(&res->completion);
+}
+
+static inline int do_one_ahash_op(struct ahash_request *req, int ret)
+{
+	if (ret == -EINPROGRESS || ret == -EBUSY) {
+		struct tcrypt_result *tr = req->base.data;
+
+		ret = wait_for_completion_interruptible(&tr->completion);
+		if (!ret)
+			ret = tr->err;
+		INIT_COMPLETION(tr->completion);
+	}
+	return ret;
+}
+
+static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
+				     char *out, int sec)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+		if (ret)
+			return ret;
+	}
+
+	printk("%6u opers/sec, %9lu bytes/sec\n",
+	       bcount / sec, ((long)bcount * blen) / sec);
+
+	return 0;
+}
+
+static int test_ahash_jiffies(struct ahash_request *req, int blen,
+			      int plen, char *out, int sec)
+{
+	unsigned long start, end;
+	int bcount, pcount;
+	int ret;
+
+	if (plen == blen)
+		return test_ahash_jiffies_digest(req, blen, out, sec);
+
+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = crypto_ahash_init(req);
+		if (ret)
+			return ret;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			ret = do_one_ahash_op(req, crypto_ahash_update(req));
+			if (ret)
+				return ret;
+		}
+		/* we assume there is enough space in 'out' for the result */
+		ret = do_one_ahash_op(req, crypto_ahash_final(req));
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%6u opers/sec, %9lu bytes/sec\n",
+		bcount / sec, ((long)bcount * blen) / sec);
+
+	return 0;
+}
+
+static int test_ahash_cycles_digest(struct ahash_request *req, int blen,
+				    char *out)
+{
+	unsigned long cycles = 0;
+	int ret, i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+
+		ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+		if (ret)
+			goto out;
+
+		end = get_cycles();
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret)
+		return ret;
+
+	pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
+		cycles / 8, cycles / (8 * blen));
+
+	return 0;
+}
+
+static int test_ahash_cycles(struct ahash_request *req, int blen,
+			     int plen, char *out)
+{
+	unsigned long cycles = 0;
+	int i, pcount, ret;
+
+	if (plen == blen)
+		return test_ahash_cycles_digest(req, blen, out);
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = crypto_ahash_init(req);
+		if (ret)
+			goto out;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			ret = do_one_ahash_op(req, crypto_ahash_update(req));
+			if (ret)
+				goto out;
+		}
+		ret = do_one_ahash_op(req, crypto_ahash_final(req));
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+
+		ret = crypto_ahash_init(req);
+		if (ret)
+			goto out;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			ret = do_one_ahash_op(req, crypto_ahash_update(req));
+			if (ret)
+				goto out;
+		}
+		ret = do_one_ahash_op(req, crypto_ahash_final(req));
+		if (ret)
+			goto out;
+
+		end = get_cycles();
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret)
+		return ret;
+
+	pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
+		cycles / 8, cycles / (8 * blen));
+
+	return 0;
+}
+
+static void test_ahash_speed(const char *algo, unsigned int sec,
+			     struct hash_speed *speed)
+{
+	struct scatterlist sg[TVMEMSIZE];
+	struct tcrypt_result tresult;
+	struct ahash_request *req;
+	struct crypto_ahash *tfm;
+	static char output[1024];
+	int i, ret;
+
+	printk(KERN_INFO "\ntesting speed of async %s\n", algo);
+
+	tfm = crypto_alloc_ahash(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+		       algo, PTR_ERR(tfm));
+		return;
+	}
+
+	if (crypto_ahash_digestsize(tfm) > sizeof(output)) {
+		pr_err("digestsize(%u) > outputbuffer(%zu)\n",
+		       crypto_ahash_digestsize(tfm), sizeof(output));
+		goto out;
+	}
+
+	test_hash_sg_init(sg);
+	req = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		pr_err("ahash request allocation failure\n");
+		goto out;
+	}
+
+	init_completion(&tresult.completion);
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   tcrypt_complete, &tresult);
+
+	for (i = 0; speed[i].blen != 0; i++) {
+		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
+			pr_err("template (%u) too big for tvmem (%lu)\n",
+			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
+			break;
+		}
+
+		pr_info("test%3u "
+			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
+			i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
+
+		ahash_request_set_crypt(req, sg, output, speed[i].plen);
+
+		if (sec)
+			ret = test_ahash_jiffies(req, speed[i].blen,
+						 speed[i].plen, output, sec);
+		else
+			ret = test_ahash_cycles(req, speed[i].blen,
+						speed[i].plen, output);
+
+		if (ret) {
+			pr_err("hashing failed ret=%d\n", ret);
+			break;
+		}
+	}
+
+	ahash_request_free(req);
+
+out:
+	crypto_free_ahash(tfm);
+}
+
 static void test_available(void)
 {
 	char **name = check;
@@ -891,6 +1141,80 @@ static int do_test(int m)
 	case 399:
 		break;
 
+	case 400:
+		/* fall through */
+
+	case 401:
+		test_ahash_speed("md4", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 402:
+		test_ahash_speed("md5", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 403:
+		test_ahash_speed("sha1", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 404:
+		test_ahash_speed("sha256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 405:
+		test_ahash_speed("sha384", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 406:
+		test_ahash_speed("sha512", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 407:
+		test_ahash_speed("wp256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 408:
+		test_ahash_speed("wp384", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 409:
+		test_ahash_speed("wp512", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 410:
+		test_ahash_speed("tgr128", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 411:
+		test_ahash_speed("tgr160", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 412:
+		test_ahash_speed("tgr192", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 413:
+		test_ahash_speed("sha224", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 414:
+		test_ahash_speed("rmd128", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 415:
+		test_ahash_speed("rmd160", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 416:
+		test_ahash_speed("rmd256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 417:
+		test_ahash_speed("rmd320", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+
+	case 499:
+		break;
+
 	case 1000:
 		test_available();
 		break;
-- 
cgit v0.10.2


From a8f1a05292db8b410be47fa905669672011f0343 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 14:12:03 +1000
Subject: crypto: testmgr - Add testing for async hashing and update/final

Extend testmgr such that it tests async hash algorithms,
and that for both sync and async hashes it tests both
->digest() and ->update()/->final() sequences.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c494d76..5c8aaa0 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -153,8 +153,21 @@ static void testmgr_free_buf(char *buf[XBUFSIZE])
 		free_page((unsigned long)buf[i]);
 }
 
+static int do_one_async_hash_op(struct ahash_request *req,
+				struct tcrypt_result *tr,
+				int ret)
+{
+	if (ret == -EINPROGRESS || ret == -EBUSY) {
+		ret = wait_for_completion_interruptible(&tr->completion);
+		if (!ret)
+			ret = tr->err;
+		INIT_COMPLETION(tr->completion);
+	}
+	return ret;
+}
+
 static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
-		     unsigned int tcount)
+		     unsigned int tcount, bool use_digest)
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
 	unsigned int i, j, k, temp;
@@ -206,23 +219,36 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 		}
 
 		ahash_request_set_crypt(req, sg, result, template[i].psize);
-		ret = crypto_ahash_digest(req);
-		switch (ret) {
-		case 0:
-			break;
-		case -EINPROGRESS:
-		case -EBUSY:
-			ret = wait_for_completion_interruptible(
-				&tresult.completion);
-			if (!ret && !(ret = tresult.err)) {
-				INIT_COMPLETION(tresult.completion);
-				break;
+		if (use_digest) {
+			ret = do_one_async_hash_op(req, &tresult,
+						   crypto_ahash_digest(req));
+			if (ret) {
+				pr_err("alg: hash: digest failed on test %d "
+				       "for %s: ret=%d\n", j, algo, -ret);
+				goto out;
+			}
+		} else {
+			ret = do_one_async_hash_op(req, &tresult,
+						   crypto_ahash_init(req));
+			if (ret) {
+				pr_err("alt: hash: init failed on test %d "
+				       "for %s: ret=%d\n", j, algo, -ret);
+				goto out;
+			}
+			ret = do_one_async_hash_op(req, &tresult,
+						   crypto_ahash_update(req));
+			if (ret) {
+				pr_err("alt: hash: update failed on test %d "
+				       "for %s: ret=%d\n", j, algo, -ret);
+				goto out;
+			}
+			ret = do_one_async_hash_op(req, &tresult,
+						   crypto_ahash_final(req));
+			if (ret) {
+				pr_err("alt: hash: final failed on test %d "
+				       "for %s: ret=%d\n", j, algo, -ret);
+				goto out;
 			}
-			/* fall through */
-		default:
-			printk(KERN_ERR "alg: hash: digest failed on test %d "
-			       "for %s: ret=%d\n", j, algo, -ret);
-			goto out;
 		}
 
 		if (memcmp(result, template[i].digest,
@@ -1402,7 +1428,11 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
 		return PTR_ERR(tfm);
 	}
 
-	err = test_hash(tfm, desc->suite.hash.vecs, desc->suite.hash.count);
+	err = test_hash(tfm, desc->suite.hash.vecs,
+			desc->suite.hash.count, true);
+	if (!err)
+		err = test_hash(tfm, desc->suite.hash.vecs,
+				desc->suite.hash.count, false);
 
 	crypto_free_ahash(tfm);
 	return err;
-- 
cgit v0.10.2


From bf06099db18a1244957076e51847c644cfe46808 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 14:13:07 +1000
Subject: crypto: skcipher - Add ablkcipher_walk interfaces

These are akin to the blkcipher_walk helpers.

The main differences in the async variant are:

1) Only physical walking is supported.  We can't hold on to
   kmap mappings across the async operation to support virtual
   ablkcipher_walk operations anyways.

2) Bounce buffers used for async more need to be persistent and
   freed at a later point in time when the async op completes.
   Therefore we maintain a list of writeback buffers and require
   that the ablkcipher_walk user call the 'complete' operation
   so we can copy the bounce buffers out to the real buffers and
   free up the bounce buffer chunks.

These interfaces will be used by the new Niagara2 crypto driver.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index fe980da..98a6610 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -24,10 +24,287 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
+#include <crypto/scatterwalk.h>
+
 #include "internal.h"
 
 static const char *skcipher_default_geniv __read_mostly;
 
+struct ablkcipher_buffer {
+	struct list_head	entry;
+	struct scatter_walk	dst;
+	unsigned int		len;
+	void			*data;
+};
+
+enum {
+	ABLKCIPHER_WALK_SLOW = 1 << 0,
+};
+
+static inline void ablkcipher_buffer_write(struct ablkcipher_buffer *p)
+{
+	scatterwalk_copychunks(p->data, &p->dst, p->len, 1);
+}
+
+void __ablkcipher_walk_complete(struct ablkcipher_walk *walk)
+{
+	struct ablkcipher_buffer *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
+		ablkcipher_buffer_write(p);
+		list_del(&p->entry);
+		kfree(p);
+	}
+}
+EXPORT_SYMBOL_GPL(__ablkcipher_walk_complete);
+
+static inline void ablkcipher_queue_write(struct ablkcipher_walk *walk,
+					  struct ablkcipher_buffer *p)
+{
+	p->dst = walk->out;
+	list_add_tail(&p->entry, &walk->buffers);
+}
+
+/* Get a spot of the specified length that does not straddle a page.
+ * The caller needs to ensure that there is enough space for this operation.
+ */
+static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len)
+{
+	u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
+	return max(start, end_page);
+}
+
+static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
+						unsigned int bsize)
+{
+	unsigned int n = bsize;
+
+	for (;;) {
+		unsigned int len_this_page = scatterwalk_pagelen(&walk->out);
+
+		if (len_this_page > n)
+			len_this_page = n;
+		scatterwalk_advance(&walk->out, n);
+		if (n == len_this_page)
+			break;
+		n -= len_this_page;
+		scatterwalk_start(&walk->out, scatterwalk_sg_next(walk->out.sg));
+	}
+
+	return bsize;
+}
+
+static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk,
+						unsigned int n)
+{
+	scatterwalk_advance(&walk->in, n);
+	scatterwalk_advance(&walk->out, n);
+
+	return n;
+}
+
+static int ablkcipher_walk_next(struct ablkcipher_request *req,
+				struct ablkcipher_walk *walk);
+
+int ablkcipher_walk_done(struct ablkcipher_request *req,
+			 struct ablkcipher_walk *walk, int err)
+{
+	struct crypto_tfm *tfm = req->base.tfm;
+	unsigned int nbytes = 0;
+
+	if (likely(err >= 0)) {
+		unsigned int n = walk->nbytes - err;
+
+		if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW)))
+			n = ablkcipher_done_fast(walk, n);
+		else if (WARN_ON(err)) {
+			err = -EINVAL;
+			goto err;
+		} else
+			n = ablkcipher_done_slow(walk, n);
+
+		nbytes = walk->total - n;
+		err = 0;
+	}
+
+	scatterwalk_done(&walk->in, 0, nbytes);
+	scatterwalk_done(&walk->out, 1, nbytes);
+
+err:
+	walk->total = nbytes;
+	walk->nbytes = nbytes;
+
+	if (nbytes) {
+		crypto_yield(req->base.flags);
+		return ablkcipher_walk_next(req, walk);
+	}
+
+	if (walk->iv != req->info)
+		memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
+	if (walk->iv_buffer)
+		kfree(walk->iv_buffer);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ablkcipher_walk_done);
+
+static inline int ablkcipher_next_slow(struct ablkcipher_request *req,
+				       struct ablkcipher_walk *walk,
+				       unsigned int bsize,
+				       unsigned int alignmask,
+				       void **src_p, void **dst_p)
+{
+	unsigned aligned_bsize = ALIGN(bsize, alignmask + 1);
+	struct ablkcipher_buffer *p;
+	void *src, *dst, *base;
+	unsigned int n;
+
+	n = ALIGN(sizeof(struct ablkcipher_buffer), alignmask + 1);
+	n += (aligned_bsize * 3 - (alignmask + 1) +
+	      (alignmask & ~(crypto_tfm_ctx_alignment() - 1)));
+
+	p = kmalloc(n, GFP_ATOMIC);
+	if (!p)
+		ablkcipher_walk_done(req, walk, -ENOMEM);
+
+	base = p + 1;
+
+	dst = (u8 *)ALIGN((unsigned long)base, alignmask + 1);
+	src = dst = ablkcipher_get_spot(dst, bsize);
+
+	p->len = bsize;
+	p->data = dst;
+
+	scatterwalk_copychunks(src, &walk->in, bsize, 0);
+
+	ablkcipher_queue_write(walk, p);
+
+	walk->nbytes = bsize;
+	walk->flags |= ABLKCIPHER_WALK_SLOW;
+
+	*src_p = src;
+	*dst_p = dst;
+
+	return 0;
+}
+
+static inline int ablkcipher_copy_iv(struct ablkcipher_walk *walk,
+				     struct crypto_tfm *tfm,
+				     unsigned int alignmask)
+{
+	unsigned bs = walk->blocksize;
+	unsigned int ivsize = tfm->crt_ablkcipher.ivsize;
+	unsigned aligned_bs = ALIGN(bs, alignmask + 1);
+	unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
+			    (alignmask + 1);
+	u8 *iv;
+
+	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	walk->iv_buffer = kmalloc(size, GFP_ATOMIC);
+	if (!walk->iv_buffer)
+		return -ENOMEM;
+
+	iv = (u8 *)ALIGN((unsigned long)walk->iv_buffer, alignmask + 1);
+	iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
+	iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
+	iv = ablkcipher_get_spot(iv, ivsize);
+
+	walk->iv = memcpy(iv, walk->iv, ivsize);
+	return 0;
+}
+
+static inline int ablkcipher_next_fast(struct ablkcipher_request *req,
+				       struct ablkcipher_walk *walk)
+{
+	walk->src.page = scatterwalk_page(&walk->in);
+	walk->src.offset = offset_in_page(walk->in.offset);
+	walk->dst.page = scatterwalk_page(&walk->out);
+	walk->dst.offset = offset_in_page(walk->out.offset);
+
+	return 0;
+}
+
+static int ablkcipher_walk_next(struct ablkcipher_request *req,
+				struct ablkcipher_walk *walk)
+{
+	struct crypto_tfm *tfm = req->base.tfm;
+	unsigned int alignmask, bsize, n;
+	void *src, *dst;
+	int err;
+
+	alignmask = crypto_tfm_alg_alignmask(tfm);
+	n = walk->total;
+	if (unlikely(n < crypto_tfm_alg_blocksize(tfm))) {
+		req->base.flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
+		return ablkcipher_walk_done(req, walk, -EINVAL);
+	}
+
+	walk->flags &= ~ABLKCIPHER_WALK_SLOW;
+	src = dst = NULL;
+
+	bsize = min(walk->blocksize, n);
+	n = scatterwalk_clamp(&walk->in, n);
+	n = scatterwalk_clamp(&walk->out, n);
+
+	if (n < bsize ||
+	    !scatterwalk_aligned(&walk->in, alignmask) ||
+	    !scatterwalk_aligned(&walk->out, alignmask)) {
+		err = ablkcipher_next_slow(req, walk, bsize, alignmask,
+					   &src, &dst);
+		goto set_phys_lowmem;
+	}
+
+	walk->nbytes = n;
+
+	return ablkcipher_next_fast(req, walk);
+
+set_phys_lowmem:
+	if (err >= 0) {
+		walk->src.page = virt_to_page(src);
+		walk->dst.page = virt_to_page(dst);
+		walk->src.offset = ((unsigned long)src & (PAGE_SIZE - 1));
+		walk->dst.offset = ((unsigned long)dst & (PAGE_SIZE - 1));
+	}
+
+	return err;
+}
+
+static int ablkcipher_walk_first(struct ablkcipher_request *req,
+				 struct ablkcipher_walk *walk)
+{
+	struct crypto_tfm *tfm = req->base.tfm;
+	unsigned int alignmask;
+
+	alignmask = crypto_tfm_alg_alignmask(tfm);
+	if (WARN_ON_ONCE(in_irq()))
+		return -EDEADLK;
+
+	walk->nbytes = walk->total;
+	if (unlikely(!walk->total))
+		return 0;
+
+	walk->iv_buffer = NULL;
+	walk->iv = req->info;
+	if (unlikely(((unsigned long)walk->iv & alignmask))) {
+		int err = ablkcipher_copy_iv(walk, tfm, alignmask);
+		if (err)
+			return err;
+	}
+
+	scatterwalk_start(&walk->in, walk->in.sg);
+	scatterwalk_start(&walk->out, walk->out.sg);
+
+	return ablkcipher_walk_next(req, walk);
+}
+
+int ablkcipher_walk_phys(struct ablkcipher_request *req,
+			 struct ablkcipher_walk *walk)
+{
+	walk->blocksize = crypto_tfm_alg_blocksize(req->base.tfm);
+	return ablkcipher_walk_first(req, walk);
+}
+EXPORT_SYMBOL_GPL(ablkcipher_walk_phys);
+
 static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index fc0d575..59c3e5b 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -103,6 +103,23 @@ struct blkcipher_walk {
 	unsigned int blocksize;
 };
 
+struct ablkcipher_walk {
+	struct {
+		struct page *page;
+		unsigned int offset;
+	} src, dst;
+
+	struct scatter_walk	in;
+	unsigned int		nbytes;
+	struct scatter_walk	out;
+	unsigned int		total;
+	struct list_head	buffers;
+	u8			*iv_buffer;
+	u8			*iv;
+	int			flags;
+	unsigned int		blocksize;
+};
+
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_blkcipher_type;
@@ -173,6 +190,12 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      struct blkcipher_walk *walk,
 			      unsigned int blocksize);
 
+int ablkcipher_walk_done(struct ablkcipher_request *req,
+			 struct ablkcipher_walk *walk, int err);
+int ablkcipher_walk_phys(struct ablkcipher_request *req,
+			 struct ablkcipher_walk *walk);
+void __ablkcipher_walk_complete(struct ablkcipher_walk *walk);
+
 static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
 {
 	return PTR_ALIGN(crypto_tfm_ctx(tfm),
@@ -283,6 +306,23 @@ static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
 	walk->total = nbytes;
 }
 
+static inline void ablkcipher_walk_init(struct ablkcipher_walk *walk,
+					struct scatterlist *dst,
+					struct scatterlist *src,
+					unsigned int nbytes)
+{
+	walk->in.sg = src;
+	walk->out.sg = dst;
+	walk->total = nbytes;
+	INIT_LIST_HEAD(&walk->buffers);
+}
+
+static inline void ablkcipher_walk_complete(struct ablkcipher_walk *walk)
+{
+	if (unlikely(!list_empty(&walk->buffers)))
+		__ablkcipher_walk_complete(walk);
+}
+
 static inline struct crypto_async_request *crypto_get_backlog(
 	struct crypto_queue *queue)
 {
-- 
cgit v0.10.2


From 0a625fd2abaa0aa0a6852b153d429dbc784748cc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 May 2010 14:14:04 +1000
Subject: crypto: n2 - Add Niagara2 crypto driver

Current deficiencies:

1) No HMAC hash support yet.

2) Although the algs are registered as ASYNC they always run
   synchronously.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9073aa0..fbf94cf 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -170,6 +170,18 @@ config CRYPTO_DEV_MV_CESA
 
 	  Currently the driver supports AES in ECB and CBC mode without DMA.
 
+config CRYPTO_DEV_NIAGARA2
+       tristate "Niagara2 Stream Processing Unit driver"
+       select CRYPTO_ALGAPI
+       depends on SPARC64
+       help
+	  Each core of a Niagara2 processor contains a Stream
+	  Processing Unit, which itself contains several cryptographic
+	  sub-units.  One set provides the Modular Arithmetic Unit,
+	  used for SSL offload.  The other set provides the Cipher
+	  Group, which can perform encryption, decryption, hashing,
+	  checksumming, and raw copies.
+
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
 	select CRYPTO_DES
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index c9494e1..6dbbe00 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,6 +1,8 @@
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
+obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
+n2_crypto-objs := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
diff --git a/drivers/crypto/n2_asm.S b/drivers/crypto/n2_asm.S
new file mode 100644
index 0000000..f7c7937
--- /dev/null
+++ b/drivers/crypto/n2_asm.S
@@ -0,0 +1,95 @@
+/* n2_asm.S: Hypervisor calls for NCS support.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+#include <asm/hypervisor.h>
+#include "n2_core.h"
+
+	/* o0: queue type
+	 * o1: RA of queue
+	 * o2: num entries in queue
+	 * o3: address of queue handle return
+	 */
+ENTRY(sun4v_ncs_qconf)
+	mov	HV_FAST_NCS_QCONF, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o3]
+	retl
+	 nop
+ENDPROC(sun4v_ncs_qconf)
+
+	/* %o0: queue handle
+	 * %o1: address of queue type return
+	 * %o2: address of queue base address return
+	 * %o3: address of queue num entries return
+	 */
+ENTRY(sun4v_ncs_qinfo)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	%o3, %g3
+	mov	HV_FAST_NCS_QINFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	stx	%o3, [%g3]
+	retl
+	 nop
+ENDPROC(sun4v_ncs_qinfo)
+
+	/* %o0: queue handle
+	 * %o1: address of head offset return
+	 */
+ENTRY(sun4v_ncs_gethead)
+	mov	%o1, %o2
+	mov	HV_FAST_NCS_GETHEAD, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 nop
+ENDPROC(sun4v_ncs_gethead)
+
+	/* %o0: queue handle
+	 * %o1: address of tail offset return
+	 */
+ENTRY(sun4v_ncs_gettail)
+	mov	%o1, %o2
+	mov	HV_FAST_NCS_GETTAIL, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 nop
+ENDPROC(sun4v_ncs_gettail)
+
+	/* %o0: queue handle
+	 * %o1: new tail offset
+	 */
+ENTRY(sun4v_ncs_settail)
+	mov	HV_FAST_NCS_SETTAIL, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ncs_settail)
+
+	/* %o0: queue handle
+	 * %o1: address of devino return
+	 */
+ENTRY(sun4v_ncs_qhandle_to_devino)
+	mov	%o1, %o2
+	mov	HV_FAST_NCS_QHANDLE_TO_DEVINO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 nop
+ENDPROC(sun4v_ncs_qhandle_to_devino)
+
+	/* %o0: queue handle
+	 * %o1: new head offset
+	 */
+ENTRY(sun4v_ncs_sethead_marker)
+	mov	HV_FAST_NCS_SETHEAD_MARKER, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ncs_sethead_marker)
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
new file mode 100644
index 0000000..8566be8
--- /dev/null
+++ b/drivers/crypto/n2_core.c
@@ -0,0 +1,2083 @@
+/* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#include "n2_core.h"
+
+#define DRV_MODULE_NAME		"n2_crypto"
+#define DRV_MODULE_VERSION	"0.1"
+#define DRV_MODULE_RELDATE	"April 29, 2010"
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Niagara2 Crypto driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define N2_CRA_PRIORITY		300
+
+static DEFINE_MUTEX(spu_lock);
+
+struct spu_queue {
+	cpumask_t		sharing;
+	unsigned long		qhandle;
+
+	spinlock_t		lock;
+	u8			q_type;
+	void			*q;
+	unsigned long		head;
+	unsigned long		tail;
+	struct list_head	jobs;
+
+	unsigned long		devino;
+
+	char			irq_name[32];
+	unsigned int		irq;
+
+	struct list_head	list;
+};
+
+static struct spu_queue **cpu_to_cwq;
+static struct spu_queue **cpu_to_mau;
+
+static unsigned long spu_next_offset(struct spu_queue *q, unsigned long off)
+{
+	if (q->q_type == HV_NCS_QTYPE_MAU) {
+		off += MAU_ENTRY_SIZE;
+		if (off == (MAU_ENTRY_SIZE * MAU_NUM_ENTRIES))
+			off = 0;
+	} else {
+		off += CWQ_ENTRY_SIZE;
+		if (off == (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES))
+			off = 0;
+	}
+	return off;
+}
+
+struct n2_request_common {
+	struct list_head	entry;
+	unsigned int		offset;
+};
+#define OFFSET_NOT_RUNNING	(~(unsigned int)0)
+
+/* An async job request records the final tail value it used in
+ * n2_request_common->offset, test to see if that offset is in
+ * the range old_head, new_head, inclusive.
+ */
+static inline bool job_finished(struct spu_queue *q, unsigned int offset,
+				unsigned long old_head, unsigned long new_head)
+{
+	if (old_head <= new_head) {
+		if (offset > old_head && offset <= new_head)
+			return true;
+	} else {
+		if (offset > old_head || offset <= new_head)
+			return true;
+	}
+	return false;
+}
+
+/* When the HEAD marker is unequal to the actual HEAD, we get
+ * a virtual device INO interrupt.  We should process the
+ * completed CWQ entries and adjust the HEAD marker to clear
+ * the IRQ.
+ */
+static irqreturn_t cwq_intr(int irq, void *dev_id)
+{
+	unsigned long off, new_head, hv_ret;
+	struct spu_queue *q = dev_id;
+
+	pr_err("CPU[%d]: Got CWQ interrupt for qhdl[%lx]\n",
+	       smp_processor_id(), q->qhandle);
+
+	spin_lock(&q->lock);
+
+	hv_ret = sun4v_ncs_gethead(q->qhandle, &new_head);
+
+	pr_err("CPU[%d]: CWQ gethead[%lx] hv_ret[%lu]\n",
+	       smp_processor_id(), new_head, hv_ret);
+
+	for (off = q->head; off != new_head; off = spu_next_offset(q, off)) {
+		/* XXX ... XXX */
+	}
+
+	hv_ret = sun4v_ncs_sethead_marker(q->qhandle, new_head);
+	if (hv_ret == HV_EOK)
+		q->head = new_head;
+
+	spin_unlock(&q->lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mau_intr(int irq, void *dev_id)
+{
+	struct spu_queue *q = dev_id;
+	unsigned long head, hv_ret;
+
+	spin_lock(&q->lock);
+
+	pr_err("CPU[%d]: Got MAU interrupt for qhdl[%lx]\n",
+	       smp_processor_id(), q->qhandle);
+
+	hv_ret = sun4v_ncs_gethead(q->qhandle, &head);
+
+	pr_err("CPU[%d]: MAU gethead[%lx] hv_ret[%lu]\n",
+	       smp_processor_id(), head, hv_ret);
+
+	sun4v_ncs_sethead_marker(q->qhandle, head);
+
+	spin_unlock(&q->lock);
+
+	return IRQ_HANDLED;
+}
+
+static void *spu_queue_next(struct spu_queue *q, void *cur)
+{
+	return q->q + spu_next_offset(q, cur - q->q);
+}
+
+static int spu_queue_num_free(struct spu_queue *q)
+{
+	unsigned long head = q->head;
+	unsigned long tail = q->tail;
+	unsigned long end = (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES);
+	unsigned long diff;
+
+	if (head > tail)
+		diff = head - tail;
+	else
+		diff = (end - tail) + head;
+
+	return (diff / CWQ_ENTRY_SIZE) - 1;
+}
+
+static void *spu_queue_alloc(struct spu_queue *q, int num_entries)
+{
+	int avail = spu_queue_num_free(q);
+
+	if (avail >= num_entries)
+		return q->q + q->tail;
+
+	return NULL;
+}
+
+static unsigned long spu_queue_submit(struct spu_queue *q, void *last)
+{
+	unsigned long hv_ret, new_tail;
+
+	new_tail = spu_next_offset(q, last - q->q);
+
+	hv_ret = sun4v_ncs_settail(q->qhandle, new_tail);
+	if (hv_ret == HV_EOK)
+		q->tail = new_tail;
+	return hv_ret;
+}
+
+static u64 control_word_base(unsigned int len, unsigned int hmac_key_len,
+			     int enc_type, int auth_type,
+			     unsigned int hash_len,
+			     bool sfas, bool sob, bool eob, bool encrypt,
+			     int opcode)
+{
+	u64 word = (len - 1) & CONTROL_LEN;
+
+	word |= ((u64) opcode << CONTROL_OPCODE_SHIFT);
+	word |= ((u64) enc_type << CONTROL_ENC_TYPE_SHIFT);
+	word |= ((u64) auth_type << CONTROL_AUTH_TYPE_SHIFT);
+	if (sfas)
+		word |= CONTROL_STORE_FINAL_AUTH_STATE;
+	if (sob)
+		word |= CONTROL_START_OF_BLOCK;
+	if (eob)
+		word |= CONTROL_END_OF_BLOCK;
+	if (encrypt)
+		word |= CONTROL_ENCRYPT;
+	if (hmac_key_len)
+		word |= ((u64) (hmac_key_len - 1)) << CONTROL_HMAC_KEY_LEN_SHIFT;
+	if (hash_len)
+		word |= ((u64) (hash_len - 1)) << CONTROL_HASH_LEN_SHIFT;
+
+	return word;
+}
+
+#if 0
+static inline bool n2_should_run_async(struct spu_queue *qp, int this_len)
+{
+	if (this_len >= 64 ||
+	    qp->head != qp->tail)
+		return true;
+	return false;
+}
+#endif
+
+struct n2_base_ctx {
+	struct list_head		list;
+};
+
+static void n2_base_ctx_init(struct n2_base_ctx *ctx)
+{
+	INIT_LIST_HEAD(&ctx->list);
+}
+
+struct n2_hash_ctx {
+	struct n2_base_ctx		base;
+
+	struct crypto_ahash		*fallback;
+
+	/* These next three members must match the layout created by
+	 * crypto_init_shash_ops_async.  This allows us to properly
+	 * plumb requests we can't do in hardware down to the fallback
+	 * operation, providing all of the data structures and layouts
+	 * expected by those paths.
+	 */
+	struct ahash_request		fallback_req;
+	struct shash_desc		fallback_desc;
+	union {
+		struct md5_state	md5;
+		struct sha1_state	sha1;
+		struct sha256_state	sha256;
+	} u;
+
+	unsigned char			hash_key[64];
+	unsigned char			keyed_zero_hash[32];
+};
+
+static int n2_hash_async_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+	ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_ahash_init(&ctx->fallback_req);
+}
+
+static int n2_hash_async_update(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+	ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	ctx->fallback_req.nbytes = req->nbytes;
+	ctx->fallback_req.src = req->src;
+
+	return crypto_ahash_update(&ctx->fallback_req);
+}
+
+static int n2_hash_async_final(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+	ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	ctx->fallback_req.result = req->result;
+
+	return crypto_ahash_final(&ctx->fallback_req);
+}
+
+static int n2_hash_async_finup(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+	ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	ctx->fallback_req.nbytes = req->nbytes;
+	ctx->fallback_req.src = req->src;
+	ctx->fallback_req.result = req->result;
+
+	return crypto_ahash_finup(&ctx->fallback_req);
+}
+
+static int n2_hash_cra_init(struct crypto_tfm *tfm)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct crypto_ahash *fallback_tfm;
+	int err;
+
+	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warning("Fallback driver '%s' could not be loaded!\n",
+			   fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+
+	ctx->fallback = fallback_tfm;
+	return 0;
+
+out:
+	return err;
+}
+
+static void n2_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+
+	crypto_free_ahash(ctx->fallback);
+}
+
+static unsigned long wait_for_tail(struct spu_queue *qp)
+{
+	unsigned long head, hv_ret;
+
+	do {
+		hv_ret = sun4v_ncs_gethead(qp->qhandle, &head);
+		if (hv_ret != HV_EOK) {
+			pr_err("Hypervisor error on gethead\n");
+			break;
+		}
+		if (head == qp->tail) {
+			qp->head = head;
+			break;
+		}
+	} while (1);
+	return hv_ret;
+}
+
+static unsigned long submit_and_wait_for_tail(struct spu_queue *qp,
+					      struct cwq_initial_entry *ent)
+{
+	unsigned long hv_ret = spu_queue_submit(qp, ent);
+
+	if (hv_ret == HV_EOK)
+		hv_ret = wait_for_tail(qp);
+
+	return hv_ret;
+}
+
+static int n2_hash_async_digest(struct ahash_request *req,
+				unsigned int auth_type, unsigned int digest_size,
+				unsigned int result_size, void *hash_loc)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cwq_initial_entry *ent;
+	struct crypto_hash_walk walk;
+	struct spu_queue *qp;
+	unsigned long flags;
+	int err = -ENODEV;
+	int nbytes, cpu;
+
+	/* The total effective length of the operation may not
+	 * exceed 2^16.
+	 */
+	if (unlikely(req->nbytes > (1 << 16))) {
+		ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+		ctx->fallback_req.base.flags =
+			req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+		ctx->fallback_req.nbytes = req->nbytes;
+		ctx->fallback_req.src = req->src;
+		ctx->fallback_req.result = req->result;
+
+		return crypto_ahash_digest(&ctx->fallback_req);
+	}
+
+	n2_base_ctx_init(&ctx->base);
+
+	nbytes = crypto_hash_walk_first(req, &walk);
+
+	cpu = get_cpu();
+	qp = cpu_to_cwq[cpu];
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	/* XXX can do better, improve this later by doing a by-hand scatterlist
+	 * XXX walk, etc.
+	 */
+	ent = qp->q + qp->tail;
+
+	ent->control = control_word_base(nbytes, 0, 0,
+					 auth_type, digest_size,
+					 false, true, false, false,
+					 OPCODE_INPLACE_BIT |
+					 OPCODE_AUTH_MAC);
+	ent->src_addr = __pa(walk.data);
+	ent->auth_key_addr = 0UL;
+	ent->auth_iv_addr = __pa(hash_loc);
+	ent->final_auth_state_addr = 0UL;
+	ent->enc_key_addr = 0UL;
+	ent->enc_iv_addr = 0UL;
+	ent->dest_addr = __pa(hash_loc);
+
+	nbytes = crypto_hash_walk_done(&walk, 0);
+	while (nbytes > 0) {
+		ent = spu_queue_next(qp, ent);
+
+		ent->control = (nbytes - 1);
+		ent->src_addr = __pa(walk.data);
+		ent->auth_key_addr = 0UL;
+		ent->auth_iv_addr = 0UL;
+		ent->final_auth_state_addr = 0UL;
+		ent->enc_key_addr = 0UL;
+		ent->enc_iv_addr = 0UL;
+		ent->dest_addr = 0UL;
+
+		nbytes = crypto_hash_walk_done(&walk, 0);
+	}
+	ent->control |= CONTROL_END_OF_BLOCK;
+
+	if (submit_and_wait_for_tail(qp, ent) != HV_EOK)
+		err = -EINVAL;
+	else
+		err = 0;
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	if (!err)
+		memcpy(req->result, hash_loc, result_size);
+out:
+	put_cpu();
+
+	return err;
+}
+
+static int n2_md5_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct md5_state *m = &ctx->u.md5;
+
+	if (unlikely(req->nbytes == 0)) {
+		static const char md5_zero[MD5_DIGEST_SIZE] = {
+			0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
+			0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
+		};
+
+		memcpy(req->result, md5_zero, MD5_DIGEST_SIZE);
+		return 0;
+	}
+	m->hash[0] = cpu_to_le32(0x67452301);
+	m->hash[1] = cpu_to_le32(0xefcdab89);
+	m->hash[2] = cpu_to_le32(0x98badcfe);
+	m->hash[3] = cpu_to_le32(0x10325476);
+
+	return n2_hash_async_digest(req, AUTH_TYPE_MD5,
+				    MD5_DIGEST_SIZE, MD5_DIGEST_SIZE,
+				    m->hash);
+}
+
+static int n2_sha1_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct sha1_state *s = &ctx->u.sha1;
+
+	if (unlikely(req->nbytes == 0)) {
+		static const char sha1_zero[SHA1_DIGEST_SIZE] = {
+			0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
+			0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
+			0x07, 0x09
+		};
+
+		memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA1_H0;
+	s->state[1] = SHA1_H1;
+	s->state[2] = SHA1_H2;
+	s->state[3] = SHA1_H3;
+	s->state[4] = SHA1_H4;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA1,
+				    SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE,
+				    s->state);
+}
+
+static int n2_sha256_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct sha256_state *s = &ctx->u.sha256;
+
+	if (req->nbytes == 0) {
+		static const char sha256_zero[SHA256_DIGEST_SIZE] = {
+			0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
+			0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
+			0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
+			0x1b, 0x78, 0x52, 0xb8, 0x55
+		};
+
+		memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA256_H0;
+	s->state[1] = SHA256_H1;
+	s->state[2] = SHA256_H2;
+	s->state[3] = SHA256_H3;
+	s->state[4] = SHA256_H4;
+	s->state[5] = SHA256_H5;
+	s->state[6] = SHA256_H6;
+	s->state[7] = SHA256_H7;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+				    SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE,
+				    s->state);
+}
+
+static int n2_sha224_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct sha256_state *s = &ctx->u.sha256;
+
+	if (req->nbytes == 0) {
+		static const char sha224_zero[SHA224_DIGEST_SIZE] = {
+			0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+			0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+			0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+			0x2f
+		};
+
+		memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA224_H0;
+	s->state[1] = SHA224_H1;
+	s->state[2] = SHA224_H2;
+	s->state[3] = SHA224_H3;
+	s->state[4] = SHA224_H4;
+	s->state[5] = SHA224_H5;
+	s->state[6] = SHA224_H6;
+	s->state[7] = SHA224_H7;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+				    SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE,
+				    s->state);
+}
+
+struct n2_cipher_context {
+	int			key_len;
+	int			enc_type;
+	union {
+		u8		aes[AES_MAX_KEY_SIZE];
+		u8		des[DES_KEY_SIZE];
+		u8		des3[3 * DES_KEY_SIZE];
+		u8		arc4[258]; /* S-box, X, Y */
+	} key;
+};
+
+#define N2_CHUNK_ARR_LEN	16
+
+struct n2_crypto_chunk {
+	struct list_head	entry;
+	unsigned long		iv_paddr : 44;
+	unsigned long		arr_len : 20;
+	unsigned long		dest_paddr;
+	unsigned long		dest_final;
+	struct {
+		unsigned long	src_paddr : 44;
+		unsigned long	src_len : 20;
+	} arr[N2_CHUNK_ARR_LEN];
+};
+
+struct n2_request_context {
+	struct ablkcipher_walk	walk;
+	struct list_head	chunk_list;
+	struct n2_crypto_chunk	chunk;
+	u8			temp_iv[16];
+};
+
+/* The SPU allows some level of flexibility for partial cipher blocks
+ * being specified in a descriptor.
+ *
+ * It merely requires that every descriptor's length field is at least
+ * as large as the cipher block size.  This means that a cipher block
+ * can span at most 2 descriptors.  However, this does not allow a
+ * partial block to span into the final descriptor as that would
+ * violate the rule (since every descriptor's length must be at lest
+ * the block size).  So, for example, assuming an 8 byte block size:
+ *
+ *	0xe --> 0xa --> 0x8
+ *
+ * is a valid length sequence, whereas:
+ *
+ *	0xe --> 0xb --> 0x7
+ *
+ * is not a valid sequence.
+ */
+
+struct n2_cipher_alg {
+	struct list_head	entry;
+	u8			enc_type;
+	struct crypto_alg	alg;
+};
+
+static inline struct n2_cipher_alg *n2_cipher_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+
+	return container_of(alg, struct n2_cipher_alg, alg);
+}
+
+struct n2_cipher_request_context {
+	struct ablkcipher_walk	walk;
+};
+
+static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+	ctx->enc_type = (n2alg->enc_type & ENC_TYPE_CHAINING_MASK);
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		ctx->enc_type |= ENC_TYPE_ALG_AES128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->enc_type |= ENC_TYPE_ALG_AES192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->enc_type |= ENC_TYPE_ALG_AES256;
+		break;
+	default:
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->key_len = keylen;
+	memcpy(ctx->key.aes, key, keylen);
+	return 0;
+}
+
+static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	u32 tmp[DES_EXPKEY_WORDS];
+	int err;
+
+	ctx->enc_type = n2alg->enc_type;
+
+	if (keylen != DES_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	err = des_ekey(tmp, key);
+	if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	ctx->key_len = keylen;
+	memcpy(ctx->key.des, key, keylen);
+	return 0;
+}
+
+static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			  unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+	ctx->enc_type = n2alg->enc_type;
+
+	if (keylen != (3 * DES_KEY_SIZE)) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->key_len = keylen;
+	memcpy(ctx->key.des3, key, keylen);
+	return 0;
+}
+
+static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			  unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	u8 *s = ctx->key.arc4;
+	u8 *x = s + 256;
+	u8 *y = x + 1;
+	int i, j, k;
+
+	ctx->enc_type = n2alg->enc_type;
+
+	j = k = 0;
+	*x = 0;
+	*y = 0;
+	for (i = 0; i < 256; i++)
+		s[i] = i;
+	for (i = 0; i < 256; i++) {
+		u8 a = s[i];
+		j = (j + key[k] + a) & 0xff;
+		s[i] = s[j];
+		s[j] = a;
+		if (++k >= keylen)
+			k = 0;
+	}
+
+	return 0;
+}
+
+static inline int cipher_descriptor_len(int nbytes, unsigned int block_size)
+{
+	int this_len = nbytes;
+
+	this_len -= (nbytes & (block_size - 1));
+	return this_len > (1 << 16) ? (1 << 16) : this_len;
+}
+
+static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp,
+			    struct spu_queue *qp, bool encrypt)
+{
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct cwq_initial_entry *ent;
+	bool in_place;
+	int i;
+
+	ent = spu_queue_alloc(qp, cp->arr_len);
+	if (!ent) {
+		pr_info("queue_alloc() of %d fails\n",
+			cp->arr_len);
+		return -EBUSY;
+	}
+
+	in_place = (cp->dest_paddr == cp->arr[0].src_paddr);
+
+	ent->control = control_word_base(cp->arr[0].src_len,
+					 0, ctx->enc_type, 0, 0,
+					 false, true, false, encrypt,
+					 OPCODE_ENCRYPT |
+					 (in_place ? OPCODE_INPLACE_BIT : 0));
+	ent->src_addr = cp->arr[0].src_paddr;
+	ent->auth_key_addr = 0UL;
+	ent->auth_iv_addr = 0UL;
+	ent->final_auth_state_addr = 0UL;
+	ent->enc_key_addr = __pa(&ctx->key);
+	ent->enc_iv_addr = cp->iv_paddr;
+	ent->dest_addr = (in_place ? 0UL : cp->dest_paddr);
+
+	for (i = 1; i < cp->arr_len; i++) {
+		ent = spu_queue_next(qp, ent);
+
+		ent->control = cp->arr[i].src_len - 1;
+		ent->src_addr = cp->arr[i].src_paddr;
+		ent->auth_key_addr = 0UL;
+		ent->auth_iv_addr = 0UL;
+		ent->final_auth_state_addr = 0UL;
+		ent->enc_key_addr = 0UL;
+		ent->enc_iv_addr = 0UL;
+		ent->dest_addr = 0UL;
+	}
+	ent->control |= CONTROL_END_OF_BLOCK;
+
+	return (spu_queue_submit(qp, ent) != HV_EOK) ? -EINVAL : 0;
+}
+
+static int n2_compute_chunks(struct ablkcipher_request *req)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct ablkcipher_walk *walk = &rctx->walk;
+	struct n2_crypto_chunk *chunk;
+	unsigned long dest_prev;
+	unsigned int tot_len;
+	bool prev_in_place;
+	int err, nbytes;
+
+	ablkcipher_walk_init(walk, req->dst, req->src, req->nbytes);
+	err = ablkcipher_walk_phys(req, walk);
+	if (err)
+		return err;
+
+	INIT_LIST_HEAD(&rctx->chunk_list);
+
+	chunk = &rctx->chunk;
+	INIT_LIST_HEAD(&chunk->entry);
+
+	chunk->iv_paddr = 0UL;
+	chunk->arr_len = 0;
+	chunk->dest_paddr = 0UL;
+
+	prev_in_place = false;
+	dest_prev = ~0UL;
+	tot_len = 0;
+
+	while ((nbytes = walk->nbytes) != 0) {
+		unsigned long dest_paddr, src_paddr;
+		bool in_place;
+		int this_len;
+
+		src_paddr = (page_to_phys(walk->src.page) +
+			     walk->src.offset);
+		dest_paddr = (page_to_phys(walk->dst.page) +
+			      walk->dst.offset);
+		in_place = (src_paddr == dest_paddr);
+		this_len = cipher_descriptor_len(nbytes, walk->blocksize);
+
+		if (chunk->arr_len != 0) {
+			if (in_place != prev_in_place ||
+			    (!prev_in_place &&
+			     dest_paddr != dest_prev) ||
+			    chunk->arr_len == N2_CHUNK_ARR_LEN ||
+			    tot_len + this_len > (1 << 16)) {
+				chunk->dest_final = dest_prev;
+				list_add_tail(&chunk->entry,
+					      &rctx->chunk_list);
+				chunk = kzalloc(sizeof(*chunk), GFP_ATOMIC);
+				if (!chunk) {
+					err = -ENOMEM;
+					break;
+				}
+				INIT_LIST_HEAD(&chunk->entry);
+			}
+		}
+		if (chunk->arr_len == 0) {
+			chunk->dest_paddr = dest_paddr;
+			tot_len = 0;
+		}
+		chunk->arr[chunk->arr_len].src_paddr = src_paddr;
+		chunk->arr[chunk->arr_len].src_len = this_len;
+		chunk->arr_len++;
+
+		dest_prev = dest_paddr + this_len;
+		prev_in_place = in_place;
+		tot_len += this_len;
+
+		err = ablkcipher_walk_done(req, walk, nbytes - this_len);
+		if (err)
+			break;
+	}
+	if (!err && chunk->arr_len != 0) {
+		chunk->dest_final = dest_prev;
+		list_add_tail(&chunk->entry, &rctx->chunk_list);
+	}
+
+	return err;
+}
+
+static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct n2_crypto_chunk *c, *tmp;
+
+	if (final_iv)
+		memcpy(rctx->walk.iv, final_iv, rctx->walk.blocksize);
+
+	ablkcipher_walk_complete(&rctx->walk);
+	list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+		list_del(&c->entry);
+		if (unlikely(c != &rctx->chunk))
+			kfree(c);
+	}
+
+}
+
+static int n2_do_ecb(struct ablkcipher_request *req, bool encrypt)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct crypto_tfm *tfm = req->base.tfm;
+	int err = n2_compute_chunks(req);
+	struct n2_crypto_chunk *c, *tmp;
+	unsigned long flags, hv_ret;
+	struct spu_queue *qp;
+
+	if (err)
+		return err;
+
+	qp = cpu_to_cwq[get_cpu()];
+	err = -ENODEV;
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+		err = __n2_crypt_chunk(tfm, c, qp, encrypt);
+		if (err)
+			break;
+		list_del(&c->entry);
+		if (unlikely(c != &rctx->chunk))
+			kfree(c);
+	}
+	if (!err) {
+		hv_ret = wait_for_tail(qp);
+		if (hv_ret != HV_EOK)
+			err = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	put_cpu();
+
+out:
+	n2_chunk_complete(req, NULL);
+	return err;
+}
+
+static int n2_encrypt_ecb(struct ablkcipher_request *req)
+{
+	return n2_do_ecb(req, true);
+}
+
+static int n2_decrypt_ecb(struct ablkcipher_request *req)
+{
+	return n2_do_ecb(req, false);
+}
+
+static int n2_do_chaining(struct ablkcipher_request *req, bool encrypt)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct crypto_tfm *tfm = req->base.tfm;
+	unsigned long flags, hv_ret, iv_paddr;
+	int err = n2_compute_chunks(req);
+	struct n2_crypto_chunk *c, *tmp;
+	struct spu_queue *qp;
+	void *final_iv_addr;
+
+	final_iv_addr = NULL;
+
+	if (err)
+		return err;
+
+	qp = cpu_to_cwq[get_cpu()];
+	err = -ENODEV;
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	if (encrypt) {
+		iv_paddr = __pa(rctx->walk.iv);
+		list_for_each_entry_safe(c, tmp, &rctx->chunk_list,
+					 entry) {
+			c->iv_paddr = iv_paddr;
+			err = __n2_crypt_chunk(tfm, c, qp, true);
+			if (err)
+				break;
+			iv_paddr = c->dest_final - rctx->walk.blocksize;
+			list_del(&c->entry);
+			if (unlikely(c != &rctx->chunk))
+				kfree(c);
+		}
+		final_iv_addr = __va(iv_paddr);
+	} else {
+		list_for_each_entry_safe_reverse(c, tmp, &rctx->chunk_list,
+						 entry) {
+			if (c == &rctx->chunk) {
+				iv_paddr = __pa(rctx->walk.iv);
+			} else {
+				iv_paddr = (tmp->arr[tmp->arr_len-1].src_paddr +
+					    tmp->arr[tmp->arr_len-1].src_len -
+					    rctx->walk.blocksize);
+			}
+			if (!final_iv_addr) {
+				unsigned long pa;
+
+				pa = (c->arr[c->arr_len-1].src_paddr +
+				      c->arr[c->arr_len-1].src_len -
+				      rctx->walk.blocksize);
+				final_iv_addr = rctx->temp_iv;
+				memcpy(rctx->temp_iv, __va(pa),
+				       rctx->walk.blocksize);
+			}
+			c->iv_paddr = iv_paddr;
+			err = __n2_crypt_chunk(tfm, c, qp, false);
+			if (err)
+				break;
+			list_del(&c->entry);
+			if (unlikely(c != &rctx->chunk))
+				kfree(c);
+		}
+	}
+	if (!err) {
+		hv_ret = wait_for_tail(qp);
+		if (hv_ret != HV_EOK)
+			err = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	put_cpu();
+
+out:
+	n2_chunk_complete(req, err ? NULL : final_iv_addr);
+	return err;
+}
+
+static int n2_encrypt_chaining(struct ablkcipher_request *req)
+{
+	return n2_do_chaining(req, true);
+}
+
+static int n2_decrypt_chaining(struct ablkcipher_request *req)
+{
+	return n2_do_chaining(req, false);
+}
+
+struct n2_cipher_tmpl {
+	const char		*name;
+	const char		*drv_name;
+	u8			block_size;
+	u8			enc_type;
+	struct ablkcipher_alg	ablkcipher;
+};
+
+static const struct n2_cipher_tmpl cipher_tmpls[] = {
+	/* ARC4: only ECB is supported (chaining bits ignored) */
+	{	.name		= "ecb(arc4)",
+		.drv_name	= "ecb-arc4",
+		.block_size	= 1,
+		.enc_type	= (ENC_TYPE_ALG_RC4_STREAM |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= 1,
+			.max_keysize	= 256,
+			.setkey		= n2_arc4_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+
+	/* DES: ECB CBC and CFB are supported */
+	{	.name		= "ecb(des)",
+		.drv_name	= "ecb-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(des)",
+		.drv_name	= "cbc-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= DES_BLOCK_SIZE,
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "cfb(des)",
+		.drv_name	= "cfb-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_CFB),
+		.ablkcipher	= {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+
+	/* 3DES: ECB CBC and CFB are supported */
+	{	.name		= "ecb(des3_ede)",
+		.drv_name	= "ecb-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(des3_ede)",
+		.drv_name	= "cbc-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= DES_BLOCK_SIZE,
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "cfb(des3_ede)",
+		.drv_name	= "cfb-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_CFB),
+		.ablkcipher	= {
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	/* AES: ECB CBC and CTR are supported */
+	{	.name		= "ecb(aes)",
+		.drv_name	= "ecb-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(aes)",
+		.drv_name	= "cbc-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "ctr(aes)",
+		.drv_name	= "ctr-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_COUNTER),
+		.ablkcipher	= {
+			.ivsize		= AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_encrypt_chaining,
+		},
+	},
+
+};
+#define NUM_CIPHER_TMPLS ARRAY_SIZE(cipher_tmpls)
+
+static LIST_HEAD(cipher_algs);
+
+struct n2_hash_tmpl {
+	const char	*name;
+	int		(*digest)(struct ahash_request *req);
+	u8		digest_size;
+	u8		block_size;
+};
+static const struct n2_hash_tmpl hash_tmpls[] = {
+	{ .name		= "md5",
+	  .digest	= n2_md5_async_digest,
+	  .digest_size	= MD5_DIGEST_SIZE,
+	  .block_size	= MD5_HMAC_BLOCK_SIZE },
+	{ .name		= "sha1",
+	  .digest	= n2_sha1_async_digest,
+	  .digest_size	= SHA1_DIGEST_SIZE,
+	  .block_size	= SHA1_BLOCK_SIZE },
+	{ .name		= "sha256",
+	  .digest	= n2_sha256_async_digest,
+	  .digest_size	= SHA256_DIGEST_SIZE,
+	  .block_size	= SHA256_BLOCK_SIZE },
+	{ .name		= "sha224",
+	  .digest	= n2_sha224_async_digest,
+	  .digest_size	= SHA224_DIGEST_SIZE,
+	  .block_size	= SHA224_BLOCK_SIZE },
+};
+#define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
+
+struct n2_ahash_alg {
+	struct list_head	entry;
+	struct ahash_alg	alg;
+};
+static LIST_HEAD(ahash_algs);
+
+static int algs_registered;
+
+static void __n2_unregister_algs(void)
+{
+	struct n2_cipher_alg *cipher, *cipher_tmp;
+	struct n2_ahash_alg *alg, *alg_tmp;
+
+	list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) {
+		crypto_unregister_alg(&cipher->alg);
+		list_del(&cipher->entry);
+		kfree(cipher);
+	}
+	list_for_each_entry_safe(alg, alg_tmp, &ahash_algs, entry) {
+		crypto_unregister_ahash(&alg->alg);
+		list_del(&alg->entry);
+		kfree(alg);
+	}
+}
+
+static int n2_cipher_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize = sizeof(struct n2_request_context);
+	return 0;
+}
+
+static int __devinit __n2_register_one_cipher(const struct n2_cipher_tmpl *tmpl)
+{
+	struct n2_cipher_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct crypto_alg *alg;
+	int err;
+
+	if (!p)
+		return -ENOMEM;
+
+	alg = &p->alg;
+
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name);
+	alg->cra_priority = N2_CRA_PRIORITY;
+	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
+	alg->cra_blocksize = tmpl->block_size;
+	p->enc_type = tmpl->enc_type;
+	alg->cra_ctxsize = sizeof(struct n2_cipher_context);
+	alg->cra_type = &crypto_ablkcipher_type;
+	alg->cra_u.ablkcipher = tmpl->ablkcipher;
+	alg->cra_init = n2_cipher_cra_init;
+	alg->cra_module = THIS_MODULE;
+
+	list_add(&p->entry, &cipher_algs);
+	err = crypto_register_alg(alg);
+	if (err) {
+		list_del(&p->entry);
+		kfree(p);
+	}
+	return err;
+}
+
+static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
+{
+	struct n2_ahash_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	struct ahash_alg *ahash;
+	int err;
+
+	if (!p)
+		return -ENOMEM;
+
+	ahash = &p->alg;
+	ahash->init = n2_hash_async_init;
+	ahash->update = n2_hash_async_update;
+	ahash->final = n2_hash_async_final;
+	ahash->finup = n2_hash_async_finup;
+	ahash->digest = tmpl->digest;
+
+	halg = &ahash->halg;
+	halg->digestsize = tmpl->digest_size;
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->name);
+	base->cra_priority = N2_CRA_PRIORITY;
+	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK;
+	base->cra_blocksize = tmpl->block_size;
+	base->cra_ctxsize = sizeof(struct n2_hash_ctx);
+	base->cra_module = THIS_MODULE;
+	base->cra_init = n2_hash_cra_init;
+	base->cra_exit = n2_hash_cra_exit;
+
+	list_add(&p->entry, &ahash_algs);
+	err = crypto_register_ahash(ahash);
+	if (err) {
+		list_del(&p->entry);
+		kfree(p);
+	}
+	return err;
+}
+
+static int __devinit n2_register_algs(void)
+{
+	int i, err = 0;
+
+	mutex_lock(&spu_lock);
+	if (algs_registered++)
+		goto out;
+
+	for (i = 0; i < NUM_HASH_TMPLS; i++) {
+		err = __n2_register_one_ahash(&hash_tmpls[i]);
+		if (err) {
+			__n2_unregister_algs();
+			goto out;
+		}
+	}
+	for (i = 0; i < NUM_CIPHER_TMPLS; i++) {
+		err = __n2_register_one_cipher(&cipher_tmpls[i]);
+		if (err) {
+			__n2_unregister_algs();
+			goto out;
+		}
+	}
+
+out:
+	mutex_unlock(&spu_lock);
+	return err;
+}
+
+static void __exit n2_unregister_algs(void)
+{
+	mutex_lock(&spu_lock);
+	if (!--algs_registered)
+		__n2_unregister_algs();
+	mutex_unlock(&spu_lock);
+}
+
+/* To map CWQ queues to interrupt sources, the hypervisor API provides
+ * a devino.  This isn't very useful to us because all of the
+ * interrupts listed in the of_device node have been translated to
+ * Linux virtual IRQ cookie numbers.
+ *
+ * So we have to back-translate, going through the 'intr' and 'ino'
+ * property tables of the n2cp MDESC node, matching it with the OF
+ * 'interrupts' property entries, in order to to figure out which
+ * devino goes to which already-translated IRQ.
+ */
+static int find_devino_index(struct of_device *dev, struct spu_mdesc_info *ip,
+			     unsigned long dev_ino)
+{
+	const unsigned int *dev_intrs;
+	unsigned int intr;
+	int i;
+
+	for (i = 0; i < ip->num_intrs; i++) {
+		if (ip->ino_table[i].ino == dev_ino)
+			break;
+	}
+	if (i == ip->num_intrs)
+		return -ENODEV;
+
+	intr = ip->ino_table[i].intr;
+
+	dev_intrs = of_get_property(dev->node, "interrupts", NULL);
+	if (!dev_intrs)
+		return -ENODEV;
+
+	for (i = 0; i < dev->num_irqs; i++) {
+		if (dev_intrs[i] == intr)
+			return i;
+	}
+
+	return -ENODEV;
+}
+
+static int spu_map_ino(struct of_device *dev, struct spu_mdesc_info *ip,
+		       const char *irq_name, struct spu_queue *p,
+		       irq_handler_t handler)
+{
+	unsigned long herr;
+	int index;
+
+	herr = sun4v_ncs_qhandle_to_devino(p->qhandle, &p->devino);
+	if (herr)
+		return -EINVAL;
+
+	index = find_devino_index(dev, ip, p->devino);
+	if (index < 0)
+		return index;
+
+	p->irq = dev->irqs[index];
+
+	sprintf(p->irq_name, "%s-%d", irq_name, index);
+
+	return request_irq(p->irq, handler, IRQF_SAMPLE_RANDOM,
+			   p->irq_name, p);
+}
+
+static struct kmem_cache *queue_cache[2];
+
+static void *new_queue(unsigned long q_type)
+{
+	return kmem_cache_zalloc(queue_cache[q_type - 1], GFP_KERNEL);
+}
+
+static void free_queue(void *p, unsigned long q_type)
+{
+	return kmem_cache_free(queue_cache[q_type - 1], p);
+}
+
+static int queue_cache_init(void)
+{
+	if (!queue_cache[HV_NCS_QTYPE_MAU - 1])
+		queue_cache[HV_NCS_QTYPE_MAU - 1] =
+			kmem_cache_create("cwq_queue",
+					  (MAU_NUM_ENTRIES *
+					   MAU_ENTRY_SIZE),
+					  MAU_ENTRY_SIZE, 0, NULL);
+	if (!queue_cache[HV_NCS_QTYPE_MAU - 1])
+		return -ENOMEM;
+
+	if (!queue_cache[HV_NCS_QTYPE_CWQ - 1])
+		queue_cache[HV_NCS_QTYPE_CWQ - 1] =
+			kmem_cache_create("cwq_queue",
+					  (CWQ_NUM_ENTRIES *
+					   CWQ_ENTRY_SIZE),
+					  CWQ_ENTRY_SIZE, 0, NULL);
+	if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
+		kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void queue_cache_destroy(void)
+{
+	kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+	kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+}
+
+static int spu_queue_register(struct spu_queue *p, unsigned long q_type)
+{
+	cpumask_var_t old_allowed;
+	unsigned long hv_ret;
+
+	if (cpumask_empty(&p->sharing))
+		return -EINVAL;
+
+	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_copy(old_allowed, &current->cpus_allowed);
+
+	set_cpus_allowed_ptr(current, &p->sharing);
+
+	hv_ret = sun4v_ncs_qconf(q_type, __pa(p->q),
+				 CWQ_NUM_ENTRIES, &p->qhandle);
+	if (!hv_ret)
+		sun4v_ncs_sethead_marker(p->qhandle, 0);
+
+	set_cpus_allowed_ptr(current, old_allowed);
+
+	free_cpumask_var(old_allowed);
+
+	return (hv_ret ? -EINVAL : 0);
+}
+
+static int spu_queue_setup(struct spu_queue *p)
+{
+	int err;
+
+	p->q = new_queue(p->q_type);
+	if (!p->q)
+		return -ENOMEM;
+
+	err = spu_queue_register(p, p->q_type);
+	if (err) {
+		free_queue(p->q, p->q_type);
+		p->q = NULL;
+	}
+
+	return err;
+}
+
+static void spu_queue_destroy(struct spu_queue *p)
+{
+	unsigned long hv_ret;
+
+	if (!p->q)
+		return;
+
+	hv_ret = sun4v_ncs_qconf(p->q_type, p->qhandle, 0, &p->qhandle);
+
+	if (!hv_ret)
+		free_queue(p->q, p->q_type);
+}
+
+static void spu_list_destroy(struct list_head *list)
+{
+	struct spu_queue *p, *n;
+
+	list_for_each_entry_safe(p, n, list, list) {
+		int i;
+
+		for (i = 0; i < NR_CPUS; i++) {
+			if (cpu_to_cwq[i] == p)
+				cpu_to_cwq[i] = NULL;
+		}
+
+		if (p->irq) {
+			free_irq(p->irq, p);
+			p->irq = 0;
+		}
+		spu_queue_destroy(p);
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+/* Walk the backward arcs of a CWQ 'exec-unit' node,
+ * gathering cpu membership information.
+ */
+static int spu_mdesc_walk_arcs(struct mdesc_handle *mdesc,
+			       struct of_device *dev,
+			       u64 node, struct spu_queue *p,
+			       struct spu_queue **table)
+{
+	u64 arc;
+
+	mdesc_for_each_arc(arc, mdesc, node, MDESC_ARC_TYPE_BACK) {
+		u64 tgt = mdesc_arc_target(mdesc, arc);
+		const char *name = mdesc_node_name(mdesc, tgt);
+		const u64 *id;
+
+		if (strcmp(name, "cpu"))
+			continue;
+		id = mdesc_get_property(mdesc, tgt, "id", NULL);
+		if (table[*id] != NULL) {
+			dev_err(&dev->dev, "%s: SPU cpu slot already set.\n",
+				dev->node->full_name);
+			return -EINVAL;
+		}
+		cpu_set(*id, p->sharing);
+		table[*id] = p;
+	}
+	return 0;
+}
+
+/* Process an 'exec-unit' MDESC node of type 'cwq'.  */
+static int handle_exec_unit(struct spu_mdesc_info *ip, struct list_head *list,
+			    struct of_device *dev, struct mdesc_handle *mdesc,
+			    u64 node, const char *iname, unsigned long q_type,
+			    irq_handler_t handler, struct spu_queue **table)
+{
+	struct spu_queue *p;
+	int err;
+
+	p = kzalloc(sizeof(struct spu_queue), GFP_KERNEL);
+	if (!p) {
+		dev_err(&dev->dev, "%s: Could not allocate SPU queue.\n",
+			dev->node->full_name);
+		return -ENOMEM;
+	}
+
+	cpus_clear(p->sharing);
+	spin_lock_init(&p->lock);
+	p->q_type = q_type;
+	INIT_LIST_HEAD(&p->jobs);
+	list_add(&p->list, list);
+
+	err = spu_mdesc_walk_arcs(mdesc, dev, node, p, table);
+	if (err)
+		return err;
+
+	err = spu_queue_setup(p);
+	if (err)
+		return err;
+
+	return spu_map_ino(dev, ip, iname, p, handler);
+}
+
+static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct of_device *dev,
+			  struct spu_mdesc_info *ip, struct list_head *list,
+			  const char *exec_name, unsigned long q_type,
+			  irq_handler_t handler, struct spu_queue **table)
+{
+	int err = 0;
+	u64 node;
+
+	mdesc_for_each_node_by_name(mdesc, node, "exec-unit") {
+		const char *type;
+
+		type = mdesc_get_property(mdesc, node, "type", NULL);
+		if (!type || strcmp(type, exec_name))
+			continue;
+
+		err = handle_exec_unit(ip, list, dev, mdesc, node,
+				       exec_name, q_type, handler, table);
+		if (err) {
+			spu_list_destroy(list);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
+				   struct spu_mdesc_info *ip)
+{
+	const u64 *intr, *ino;
+	int intr_len, ino_len;
+	int i;
+
+	intr = mdesc_get_property(mdesc, node, "intr", &intr_len);
+	if (!intr)
+		return -ENODEV;
+
+	ino = mdesc_get_property(mdesc, node, "ino", &ino_len);
+	if (!intr)
+		return -ENODEV;
+
+	if (intr_len != ino_len)
+		return -EINVAL;
+
+	ip->num_intrs = intr_len / sizeof(u64);
+	ip->ino_table = kzalloc((sizeof(struct ino_blob) *
+				 ip->num_intrs),
+				GFP_KERNEL);
+	if (!ip->ino_table)
+		return -ENOMEM;
+
+	for (i = 0; i < ip->num_intrs; i++) {
+		struct ino_blob *b = &ip->ino_table[i];
+		b->intr = intr[i];
+		b->ino = ino[i];
+	}
+
+	return 0;
+}
+
+static int __devinit grab_mdesc_irq_props(struct mdesc_handle *mdesc,
+					  struct of_device *dev,
+					  struct spu_mdesc_info *ip,
+					  const char *node_name)
+{
+	const unsigned int *reg;
+	u64 node;
+
+	reg = of_get_property(dev->node, "reg", NULL);
+	if (!reg)
+		return -ENODEV;
+
+	mdesc_for_each_node_by_name(mdesc, node, "virtual-device") {
+		const char *name;
+		const u64 *chdl;
+
+		name = mdesc_get_property(mdesc, node, "name", NULL);
+		if (!name || strcmp(name, node_name))
+			continue;
+		chdl = mdesc_get_property(mdesc, node, "cfg-handle", NULL);
+		if (!chdl || (*chdl != *reg))
+			continue;
+		ip->cfg_handle = *chdl;
+		return get_irq_props(mdesc, node, ip);
+	}
+
+	return -ENODEV;
+}
+
+static unsigned long n2_spu_hvapi_major;
+static unsigned long n2_spu_hvapi_minor;
+
+static int __devinit n2_spu_hvapi_register(void)
+{
+	int err;
+
+	n2_spu_hvapi_major = 2;
+	n2_spu_hvapi_minor = 0;
+
+	err = sun4v_hvapi_register(HV_GRP_NCS,
+				   n2_spu_hvapi_major,
+				   &n2_spu_hvapi_minor);
+
+	if (!err)
+		pr_info("Registered NCS HVAPI version %lu.%lu\n",
+			n2_spu_hvapi_major,
+			n2_spu_hvapi_minor);
+
+	return err;
+}
+
+static void n2_spu_hvapi_unregister(void)
+{
+	sun4v_hvapi_unregister(HV_GRP_NCS);
+}
+
+static int global_ref;
+
+static int __devinit grab_global_resources(void)
+{
+	int err = 0;
+
+	mutex_lock(&spu_lock);
+
+	if (global_ref++)
+		goto out;
+
+	err = n2_spu_hvapi_register();
+	if (err)
+		goto out;
+
+	err = queue_cache_init();
+	if (err)
+		goto out_hvapi_release;
+
+	err = -ENOMEM;
+	cpu_to_cwq = kzalloc(sizeof(struct spu_queue *) * NR_CPUS,
+			     GFP_KERNEL);
+	if (!cpu_to_cwq)
+		goto out_queue_cache_destroy;
+
+	cpu_to_mau = kzalloc(sizeof(struct spu_queue *) * NR_CPUS,
+			     GFP_KERNEL);
+	if (!cpu_to_mau)
+		goto out_free_cwq_table;
+
+	err = 0;
+
+out:
+	if (err)
+		global_ref--;
+	mutex_unlock(&spu_lock);
+	return err;
+
+out_free_cwq_table:
+	kfree(cpu_to_cwq);
+	cpu_to_cwq = NULL;
+
+out_queue_cache_destroy:
+	queue_cache_destroy();
+
+out_hvapi_release:
+	n2_spu_hvapi_unregister();
+	goto out;
+}
+
+static void release_global_resources(void)
+{
+	mutex_lock(&spu_lock);
+	if (!--global_ref) {
+		kfree(cpu_to_cwq);
+		cpu_to_cwq = NULL;
+
+		kfree(cpu_to_mau);
+		cpu_to_mau = NULL;
+
+		queue_cache_destroy();
+		n2_spu_hvapi_unregister();
+	}
+	mutex_unlock(&spu_lock);
+}
+
+static struct n2_crypto * __devinit alloc_n2cp(void)
+{
+	struct n2_crypto *np = kzalloc(sizeof(struct n2_crypto), GFP_KERNEL);
+
+	if (np)
+		INIT_LIST_HEAD(&np->cwq_list);
+
+	return np;
+}
+
+static void free_n2cp(struct n2_crypto *np)
+{
+	if (np->cwq_info.ino_table) {
+		kfree(np->cwq_info.ino_table);
+		np->cwq_info.ino_table = NULL;
+	}
+
+	kfree(np);
+}
+
+static void __devinit n2_spu_driver_version(void)
+{
+	static int n2_spu_version_printed;
+
+	if (n2_spu_version_printed++ == 0)
+		pr_info("%s", version);
+}
+
+static int __devinit n2_crypto_probe(struct of_device *dev,
+				     const struct of_device_id *match)
+{
+	struct mdesc_handle *mdesc;
+	const char *full_name;
+	struct n2_crypto *np;
+	int err;
+
+	n2_spu_driver_version();
+
+	full_name = dev->node->full_name;
+	pr_info("Found N2CP at %s\n", full_name);
+
+	np = alloc_n2cp();
+	if (!np) {
+		dev_err(&dev->dev, "%s: Unable to allocate n2cp.\n",
+			full_name);
+		return -ENOMEM;
+	}
+
+	err = grab_global_resources();
+	if (err) {
+		dev_err(&dev->dev, "%s: Unable to grab "
+			"global resources.\n", full_name);
+		goto out_free_n2cp;
+	}
+
+	mdesc = mdesc_grab();
+
+	if (!mdesc) {
+		dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
+			full_name);
+		err = -ENODEV;
+		goto out_free_global;
+	}
+	err = grab_mdesc_irq_props(mdesc, dev, &np->cwq_info, "n2cp");
+	if (err) {
+		dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
+			full_name);
+		mdesc_release(mdesc);
+		goto out_free_global;
+	}
+
+	err = spu_mdesc_scan(mdesc, dev, &np->cwq_info, &np->cwq_list,
+			     "cwq", HV_NCS_QTYPE_CWQ, cwq_intr,
+			     cpu_to_cwq);
+	mdesc_release(mdesc);
+
+	if (err) {
+		dev_err(&dev->dev, "%s: CWQ MDESC scan failed.\n",
+			full_name);
+		goto out_free_global;
+	}
+
+	err = n2_register_algs();
+	if (err) {
+		dev_err(&dev->dev, "%s: Unable to register algorithms.\n",
+			full_name);
+		goto out_free_spu_list;
+	}
+
+	dev_set_drvdata(&dev->dev, np);
+
+	return 0;
+
+out_free_spu_list:
+	spu_list_destroy(&np->cwq_list);
+
+out_free_global:
+	release_global_resources();
+
+out_free_n2cp:
+	free_n2cp(np);
+
+	return err;
+}
+
+static int __devexit n2_crypto_remove(struct of_device *dev)
+{
+	struct n2_crypto *np = dev_get_drvdata(&dev->dev);
+
+	n2_unregister_algs();
+
+	spu_list_destroy(&np->cwq_list);
+
+	release_global_resources();
+
+	free_n2cp(np);
+
+	return 0;
+}
+
+static struct n2_mau * __devinit alloc_ncp(void)
+{
+	struct n2_mau *mp = kzalloc(sizeof(struct n2_mau), GFP_KERNEL);
+
+	if (mp)
+		INIT_LIST_HEAD(&mp->mau_list);
+
+	return mp;
+}
+
+static void free_ncp(struct n2_mau *mp)
+{
+	if (mp->mau_info.ino_table) {
+		kfree(mp->mau_info.ino_table);
+		mp->mau_info.ino_table = NULL;
+	}
+
+	kfree(mp);
+}
+
+static int __devinit n2_mau_probe(struct of_device *dev,
+				     const struct of_device_id *match)
+{
+	struct mdesc_handle *mdesc;
+	const char *full_name;
+	struct n2_mau *mp;
+	int err;
+
+	n2_spu_driver_version();
+
+	full_name = dev->node->full_name;
+	pr_info("Found NCP at %s\n", full_name);
+
+	mp = alloc_ncp();
+	if (!mp) {
+		dev_err(&dev->dev, "%s: Unable to allocate ncp.\n",
+			full_name);
+		return -ENOMEM;
+	}
+
+	err = grab_global_resources();
+	if (err) {
+		dev_err(&dev->dev, "%s: Unable to grab "
+			"global resources.\n", full_name);
+		goto out_free_ncp;
+	}
+
+	mdesc = mdesc_grab();
+
+	if (!mdesc) {
+		dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
+			full_name);
+		err = -ENODEV;
+		goto out_free_global;
+	}
+
+	err = grab_mdesc_irq_props(mdesc, dev, &mp->mau_info, "ncp");
+	if (err) {
+		dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
+			full_name);
+		mdesc_release(mdesc);
+		goto out_free_global;
+	}
+
+	err = spu_mdesc_scan(mdesc, dev, &mp->mau_info, &mp->mau_list,
+			     "mau", HV_NCS_QTYPE_MAU, mau_intr,
+			     cpu_to_mau);
+	mdesc_release(mdesc);
+
+	if (err) {
+		dev_err(&dev->dev, "%s: MAU MDESC scan failed.\n",
+			full_name);
+		goto out_free_global;
+	}
+
+	dev_set_drvdata(&dev->dev, mp);
+
+	return 0;
+
+out_free_global:
+	release_global_resources();
+
+out_free_ncp:
+	free_ncp(mp);
+
+	return err;
+}
+
+static int __devexit n2_mau_remove(struct of_device *dev)
+{
+	struct n2_mau *mp = dev_get_drvdata(&dev->dev);
+
+	spu_list_destroy(&mp->mau_list);
+
+	release_global_resources();
+
+	free_ncp(mp);
+
+	return 0;
+}
+
+static struct of_device_id n2_crypto_match[] = {
+	{
+		.name = "n2cp",
+		.compatible = "SUNW,n2-cwq",
+	},
+	{
+		.name = "n2cp",
+		.compatible = "SUNW,vf-cwq",
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, n2_crypto_match);
+
+static struct of_platform_driver n2_crypto_driver = {
+	.name		=	"n2cp",
+	.match_table	=	n2_crypto_match,
+	.probe		=	n2_crypto_probe,
+	.remove		=	__devexit_p(n2_crypto_remove),
+};
+
+static struct of_device_id n2_mau_match[] = {
+	{
+		.name = "ncp",
+		.compatible = "SUNW,n2-mau",
+	},
+	{
+		.name = "ncp",
+		.compatible = "SUNW,vf-mau",
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, n2_mau_match);
+
+static struct of_platform_driver n2_mau_driver = {
+	.name		=	"ncp",
+	.match_table	=	n2_mau_match,
+	.probe		=	n2_mau_probe,
+	.remove		=	__devexit_p(n2_mau_remove),
+};
+
+static int __init n2_init(void)
+{
+	int err = of_register_driver(&n2_crypto_driver, &of_bus_type);
+
+	if (!err) {
+		err = of_register_driver(&n2_mau_driver, &of_bus_type);
+		if (err)
+			of_unregister_driver(&n2_crypto_driver);
+	}
+	return err;
+}
+
+static void __exit n2_exit(void)
+{
+	of_unregister_driver(&n2_mau_driver);
+	of_unregister_driver(&n2_crypto_driver);
+}
+
+module_init(n2_init);
+module_exit(n2_exit);
diff --git a/drivers/crypto/n2_core.h b/drivers/crypto/n2_core.h
new file mode 100644
index 0000000..4bcbbea
--- /dev/null
+++ b/drivers/crypto/n2_core.h
@@ -0,0 +1,231 @@
+#ifndef _N2_CORE_H
+#define _N2_CORE_H
+
+#ifndef __ASSEMBLY__
+
+struct ino_blob {
+	u64			intr;
+	u64			ino;
+};
+
+struct spu_mdesc_info {
+	u64			cfg_handle;
+	struct ino_blob		*ino_table;
+	int			num_intrs;
+};
+
+struct n2_crypto {
+	struct spu_mdesc_info	cwq_info;
+	struct list_head	cwq_list;
+};
+
+struct n2_mau {
+	struct spu_mdesc_info	mau_info;
+	struct list_head	mau_list;
+};
+
+#define CWQ_ENTRY_SIZE		64
+#define CWQ_NUM_ENTRIES		64
+
+#define MAU_ENTRY_SIZE		64
+#define MAU_NUM_ENTRIES		64
+
+struct cwq_initial_entry {
+	u64			control;
+	u64			src_addr;
+	u64			auth_key_addr;
+	u64			auth_iv_addr;
+	u64			final_auth_state_addr;
+	u64			enc_key_addr;
+	u64			enc_iv_addr;
+	u64			dest_addr;
+};
+
+struct cwq_ext_entry {
+	u64			len;
+	u64			src_addr;
+	u64			resv1;
+	u64			resv2;
+	u64			resv3;
+	u64			resv4;
+	u64			resv5;
+	u64			resv6;
+};
+
+struct cwq_final_entry {
+	u64			control;
+	u64			src_addr;
+	u64			resv1;
+	u64			resv2;
+	u64			resv3;
+	u64			resv4;
+	u64			resv5;
+	u64			resv6;
+};
+
+#define CONTROL_LEN			0x000000000000ffffULL
+#define CONTROL_LEN_SHIFT		0
+#define CONTROL_HMAC_KEY_LEN		0x0000000000ff0000ULL
+#define CONTROL_HMAC_KEY_LEN_SHIFT	16
+#define CONTROL_ENC_TYPE		0x00000000ff000000ULL
+#define CONTROL_ENC_TYPE_SHIFT		24
+#define  ENC_TYPE_ALG_RC4_STREAM	0x00ULL
+#define  ENC_TYPE_ALG_RC4_NOSTREAM	0x04ULL
+#define  ENC_TYPE_ALG_DES		0x08ULL
+#define  ENC_TYPE_ALG_3DES		0x0cULL
+#define  ENC_TYPE_ALG_AES128		0x10ULL
+#define  ENC_TYPE_ALG_AES192		0x14ULL
+#define  ENC_TYPE_ALG_AES256		0x18ULL
+#define  ENC_TYPE_ALG_RESERVED		0x1cULL
+#define  ENC_TYPE_ALG_MASK		0x1cULL
+#define  ENC_TYPE_CHAINING_ECB		0x00ULL
+#define  ENC_TYPE_CHAINING_CBC		0x01ULL
+#define  ENC_TYPE_CHAINING_CFB		0x02ULL
+#define  ENC_TYPE_CHAINING_COUNTER	0x03ULL
+#define  ENC_TYPE_CHAINING_MASK		0x03ULL
+#define CONTROL_AUTH_TYPE		0x0000001f00000000ULL
+#define CONTROL_AUTH_TYPE_SHIFT		32
+#define  AUTH_TYPE_RESERVED		0x00ULL
+#define  AUTH_TYPE_MD5			0x01ULL
+#define  AUTH_TYPE_SHA1			0x02ULL
+#define  AUTH_TYPE_SHA256		0x03ULL
+#define  AUTH_TYPE_CRC32		0x04ULL
+#define  AUTH_TYPE_HMAC_MD5		0x05ULL
+#define  AUTH_TYPE_HMAC_SHA1		0x06ULL
+#define  AUTH_TYPE_HMAC_SHA256		0x07ULL
+#define  AUTH_TYPE_TCP_CHECKSUM		0x08ULL
+#define  AUTH_TYPE_SSL_HMAC_MD5		0x09ULL
+#define  AUTH_TYPE_SSL_HMAC_SHA1	0x0aULL
+#define  AUTH_TYPE_SSL_HMAC_SHA256	0x0bULL
+#define CONTROL_STRAND			0x000000e000000000ULL
+#define CONTROL_STRAND_SHIFT		37
+#define CONTROL_HASH_LEN		0x0000ff0000000000ULL
+#define CONTROL_HASH_LEN_SHIFT		40
+#define CONTROL_INTERRUPT		0x0001000000000000ULL
+#define CONTROL_STORE_FINAL_AUTH_STATE	0x0002000000000000ULL
+#define CONTROL_RESERVED		0x001c000000000000ULL
+#define CONTROL_HV_DONE			0x0004000000000000ULL
+#define CONTROL_HV_PROTOCOL_ERROR	0x0008000000000000ULL
+#define CONTROL_HV_HARDWARE_ERROR	0x0010000000000000ULL
+#define CONTROL_END_OF_BLOCK		0x0020000000000000ULL
+#define CONTROL_START_OF_BLOCK		0x0040000000000000ULL
+#define CONTROL_ENCRYPT			0x0080000000000000ULL
+#define CONTROL_OPCODE			0xff00000000000000ULL
+#define CONTROL_OPCODE_SHIFT		56
+#define  OPCODE_INPLACE_BIT		0x80ULL
+#define  OPCODE_SSL_KEYBLOCK		0x10ULL
+#define  OPCODE_COPY			0x20ULL
+#define  OPCODE_ENCRYPT			0x40ULL
+#define  OPCODE_AUTH_MAC		0x41ULL
+
+#endif /* !(__ASSEMBLY__) */
+
+/* NCS v2.0 hypervisor interfaces */
+#define HV_NCS_QTYPE_MAU		0x01
+#define HV_NCS_QTYPE_CWQ		0x02
+
+/* ncs_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_QCONF
+ * ARG0:	Queue type (HV_NCS_QTYPE_{MAU,CWQ})
+ * ARG1:	Real address of queue, or handle for unconfigure
+ * ARG2:	Number of entries in queue, zero for unconfigure
+ * RET0:	status
+ * RET1:	queue handle
+ *
+ * Configure a queue in the stream processing unit.
+ *
+ * The real address given as the base must be 64-byte
+ * aligned.
+ *
+ * The queue size can range from a minimum of 2 to a maximum
+ * of 64.  The queue size must be a power of two.
+ *
+ * To unconfigure a queue, specify a length of zero and place
+ * the queue handle into ARG1.
+ *
+ * On configure success the hypervisor will set the FIRST, HEAD,
+ * and TAIL registers to the address of the first entry in the
+ * queue.  The LAST register will be set to point to the last
+ * entry in the queue.
+ */
+#define HV_FAST_NCS_QCONF		0x111
+
+/* ncs_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_QINFO
+ * ARG0:	Queue handle
+ * RET0:	status
+ * RET1:	Queue type (HV_NCS_QTYPE_{MAU,CWQ})
+ * RET2:	Queue base address
+ * RET3:	Number of entries
+ */
+#define HV_FAST_NCS_QINFO		0x112
+
+/* ncs_gethead()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_GETHEAD
+ * ARG0:	Queue handle
+ * RET0:	status
+ * RET1:	queue head offset
+ */
+#define HV_FAST_NCS_GETHEAD		0x113
+
+/* ncs_gettail()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_GETTAIL
+ * ARG0:	Queue handle
+ * RET0:	status
+ * RET1:	queue tail offset
+ */
+#define HV_FAST_NCS_GETTAIL		0x114
+
+/* ncs_settail()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_SETTAIL
+ * ARG0:	Queue handle
+ * ARG1:	New tail offset
+ * RET0:	status
+ */
+#define HV_FAST_NCS_SETTAIL		0x115
+
+/* ncs_qhandle_to_devino()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_QHANDLE_TO_DEVINO
+ * ARG0:	Queue handle
+ * RET0:	status
+ * RET1:	devino
+ */
+#define HV_FAST_NCS_QHANDLE_TO_DEVINO	0x116
+
+/* ncs_sethead_marker()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_SETHEAD_MARKER
+ * ARG0:	Queue handle
+ * ARG1:	New head offset
+ * RET0:	status
+ */
+#define HV_FAST_NCS_SETHEAD_MARKER	0x117
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_ncs_qconf(unsigned long queue_type,
+				     unsigned long queue_ra,
+				     unsigned long num_entries,
+				     unsigned long *qhandle);
+extern unsigned long sun4v_ncs_qinfo(unsigned long qhandle,
+				     unsigned long *queue_type,
+				     unsigned long *queue_ra,
+				     unsigned long *num_entries);
+extern unsigned long sun4v_ncs_gethead(unsigned long qhandle,
+				       unsigned long *head);
+extern unsigned long sun4v_ncs_gettail(unsigned long qhandle,
+				       unsigned long *tail);
+extern unsigned long sun4v_ncs_settail(unsigned long qhandle,
+				       unsigned long tail);
+extern unsigned long sun4v_ncs_qhandle_to_devino(unsigned long qhandle,
+						 unsigned long *devino);
+extern unsigned long sun4v_ncs_sethead_marker(unsigned long qhandle,
+					      unsigned long head);
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* _N2_CORE_H */
-- 
cgit v0.10.2


From d5e4aaefd99797929e4455db48d8f9a8728f05f4 Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@gmail.com>
Date: Wed, 19 May 2010 19:18:38 +1000
Subject: crypto: talitos - prepare for adding ahash algorithms

No functional changes.
Use a union in talitos_alg_template for the crypto_alg
so that we can add a member later for ahash_alg.

Signed-off-by: Lee Nipper <lee.nipper@gmail.com>
Acked-By: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index dc558a0..ce38b05 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1479,14 +1479,17 @@ static int ablkcipher_decrypt(struct ablkcipher_request *areq)
 }
 
 struct talitos_alg_template {
-	struct crypto_alg alg;
+	u32 type;
+	union {
+		struct crypto_alg crypto;
+	} alg;
 	__be32 desc_hdr_template;
 };
 
 static struct talitos_alg_template driver_algs[] = {
 	/* AEAD algorithms.  These use a single-pass ipsec_esp descriptor */
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha1),cbc(aes))",
 			.cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1511,8 +1514,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
 			.cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1538,8 +1541,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha256),cbc(aes))",
 			.cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1564,8 +1567,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
 			.cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1591,8 +1594,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(md5),cbc(aes))",
 			.cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos",
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1617,8 +1620,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_AEAD,
+		.alg.crypto = {
 			.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
 			.cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos",
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1645,8 +1648,8 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
 	/* ABLKCIPHER algorithms. */
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.alg.crypto = {
 			.cra_name = "cbc(aes)",
 			.cra_driver_name = "cbc-aes-talitos",
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1667,8 +1670,8 @@ static struct talitos_alg_template driver_algs[] = {
 				     DESC_HDR_SEL0_AESU |
 				     DESC_HDR_MODE0_AESU_CBC,
 	},
-	{
-		.alg = {
+	{	.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.alg.crypto = {
 			.cra_name = "cbc(des3_ede)",
 			.cra_driver_name = "cbc-3des-talitos",
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1789,7 +1792,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		return ERR_PTR(-ENOMEM);
 
 	alg = &t_alg->crypto_alg;
-	*alg = template->alg;
+	*alg = template->alg.crypto;
 
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = talitos_cra_init;
-- 
cgit v0.10.2


From acbf7c627fb59dfea975f7aafeaba97921085061 Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@gmail.com>
Date: Wed, 19 May 2010 19:19:33 +1000
Subject: crypto: talitos - second prepare step for adding ahash algorithms

Used talitos_alg_template in talitos_crypto_alg
so that it will accommodate ahash algorithms.
Added some preparation code for ahash allocation and removal.
No actual algorithms yet.

Signed-off-by: Lee Nipper <lee.nipper@gmail.com>
Acked-By: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index ce38b05..4e8153a 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -46,6 +46,8 @@
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 
 #include "talitos.h"
@@ -1482,6 +1484,7 @@ struct talitos_alg_template {
 	u32 type;
 	union {
 		struct crypto_alg crypto;
+		struct ahash_alg hash;
 	} alg;
 	__be32 desc_hdr_template;
 };
@@ -1698,8 +1701,7 @@ static struct talitos_alg_template driver_algs[] = {
 struct talitos_crypto_alg {
 	struct list_head entry;
 	struct device *dev;
-	__be32 desc_hdr_template;
-	struct crypto_alg crypto_alg;
+	struct talitos_alg_template algt;
 };
 
 static int talitos_cra_init(struct crypto_tfm *tfm)
@@ -1708,13 +1710,14 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
 	struct talitos_crypto_alg *talitos_alg;
 	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	talitos_alg =  container_of(alg, struct talitos_crypto_alg, crypto_alg);
+	talitos_alg =  container_of(alg, struct talitos_crypto_alg,
+				    algt.alg.crypto);
 
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
 
 	/* copy descriptor header template value */
-	ctx->desc_hdr_template = talitos_alg->desc_hdr_template;
+	ctx->desc_hdr_template = talitos_alg->algt.desc_hdr_template;
 
 	/* random first IV */
 	get_random_bytes(ctx->iv, TALITOS_MAX_IV_LENGTH);
@@ -1750,7 +1753,15 @@ static int talitos_remove(struct of_device *ofdev)
 	int i;
 
 	list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) {
-		crypto_unregister_alg(&t_alg->crypto_alg);
+		switch (t_alg->algt.type) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		case CRYPTO_ALG_TYPE_AEAD:
+			crypto_unregister_alg(&t_alg->algt.alg.crypto);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			crypto_unregister_ahash(&t_alg->algt.alg.hash);
+			break;
+		}
 		list_del(&t_alg->entry);
 		kfree(t_alg);
 	}
@@ -1791,8 +1802,16 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	if (!t_alg)
 		return ERR_PTR(-ENOMEM);
 
-	alg = &t_alg->crypto_alg;
-	*alg = template->alg.crypto;
+	t_alg->algt = *template;
+
+	switch (t_alg->algt.type) {
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+	case CRYPTO_ALG_TYPE_AEAD:
+		alg = &t_alg->algt.alg.crypto;
+		break;
+	case CRYPTO_ALG_TYPE_AHASH:
+		alg = &t_alg->algt.alg.hash.halg.base;
+	}
 
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = talitos_cra_init;
@@ -1800,7 +1819,6 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	alg->cra_alignmask = 0;
 	alg->cra_ctxsize = sizeof(struct talitos_ctx);
 
-	t_alg->desc_hdr_template = template->desc_hdr_template;
 	t_alg->dev = dev;
 
 	return t_alg;
@@ -1934,6 +1952,7 @@ static int talitos_probe(struct of_device *ofdev,
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
 			struct talitos_crypto_alg *t_alg;
+			char *name = NULL;
 
 			t_alg = talitos_alg_alloc(dev, &driver_algs[i]);
 			if (IS_ERR(t_alg)) {
@@ -1941,15 +1960,27 @@ static int talitos_probe(struct of_device *ofdev,
 				goto err_out;
 			}
 
-			err = crypto_register_alg(&t_alg->crypto_alg);
+			switch (t_alg->algt.type) {
+			case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			case CRYPTO_ALG_TYPE_AEAD:
+				err = crypto_register_alg(
+						&t_alg->algt.alg.crypto);
+				name = t_alg->algt.alg.crypto.cra_driver_name;
+				break;
+			case CRYPTO_ALG_TYPE_AHASH:
+				err = crypto_register_ahash(
+						&t_alg->algt.alg.hash);
+				name =
+				 t_alg->algt.alg.hash.halg.base.cra_driver_name;
+				break;
+			}
 			if (err) {
 				dev_err(dev, "%s alg registration failed\n",
-					t_alg->crypto_alg.cra_driver_name);
+					name);
 				kfree(t_alg);
 			} else {
 				list_add_tail(&t_alg->entry, &priv->alg_list);
-				dev_info(dev, "%s\n",
-					 t_alg->crypto_alg.cra_driver_name);
+				dev_info(dev, "%s\n", name);
 			}
 		}
 	}
-- 
cgit v0.10.2


From 497f2e6b8b21407625a4fb34bc04b50eff098085 Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@gmail.com>
Date: Wed, 19 May 2010 19:20:36 +1000
Subject: crypto: talitos - add hash algorithms

Add the following alorithms to talitos:
    md5,
    sha1,
    sha256,
    sha384,
    sha512.
These are all type ahash.

Signed-off-by: Lee Nipper <lee.nipper@gmail.com>
Acked-By: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 4e8153a..1b08a39 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -43,6 +43,7 @@
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/sha.h>
+#include <crypto/md5.h>
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
@@ -67,6 +68,13 @@ struct talitos_ptr {
 	__be32 ptr;	/* address */
 };
 
+static const struct talitos_ptr zero_entry = {
+	.len = 0,
+	.j_extent = 0,
+	.eptr = 0,
+	.ptr = 0
+};
+
 /* descriptor */
 struct talitos_desc {
 	__be32 hdr;			/* header high bits */
@@ -694,7 +702,7 @@ static void talitos_unregister_rng(struct device *dev)
 #define TALITOS_MAX_KEY_SIZE		64
 #define TALITOS_MAX_IV_LENGTH		16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 
-#define MD5_DIGEST_SIZE   16
+#define MD5_BLOCK_SIZE    64
 
 struct talitos_ctx {
 	struct device *dev;
@@ -707,6 +715,22 @@ struct talitos_ctx {
 	unsigned int authsize;
 };
 
+#define HASH_MAX_BLOCK_SIZE		SHA512_BLOCK_SIZE
+#define TALITOS_MDEU_MAX_CONTEXT_SIZE	TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512
+
+struct talitos_ahash_req_ctx {
+	u64 count;
+	u8 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE];
+	unsigned int hw_context_size;
+	u8 buf[HASH_MAX_BLOCK_SIZE];
+	u8 bufnext[HASH_MAX_BLOCK_SIZE];
+	unsigned int first;
+	unsigned int last;
+	unsigned int to_hash_later;
+	struct scatterlist bufsl[2];
+	struct scatterlist *psrc;
+};
+
 static int aead_setauthsize(struct crypto_aead *authenc,
 			    unsigned int authsize)
 {
@@ -823,10 +847,14 @@ static void talitos_sg_unmap(struct device *dev,
 		else
 			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
 
-		if (edesc->dst_is_chained)
-			talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE);
-		else
-			dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+		if (dst) {
+			if (edesc->dst_is_chained)
+				talitos_unmap_sg_chain(dev, dst,
+						       DMA_FROM_DEVICE);
+			else
+				dma_unmap_sg(dev, dst, dst_nents,
+					     DMA_FROM_DEVICE);
+		}
 	} else
 		if (edesc->src_is_chained)
 			talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
@@ -1116,12 +1144,67 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
 	return sg_nents;
 }
 
+/**
+ * sg_copy_end_to_buffer - Copy end data from SG list to a linear buffer
+ * @sgl:		 The SG list
+ * @nents:		 Number of SG entries
+ * @buf:		 Where to copy to
+ * @buflen:		 The number of bytes to copy
+ * @skip:		 The number of bytes to skip before copying.
+ *                       Note: skip + buflen should equal SG total size.
+ *
+ * Returns the number of copied bytes.
+ *
+ **/
+static size_t sg_copy_end_to_buffer(struct scatterlist *sgl, unsigned int nents,
+				    void *buf, size_t buflen, unsigned int skip)
+{
+	unsigned int offset = 0;
+	unsigned int boffset = 0;
+	struct sg_mapping_iter miter;
+	unsigned long flags;
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+	size_t total_buffer = buflen + skip;
+
+	sg_flags |= SG_MITER_FROM_SG;
+
+	sg_miter_start(&miter, sgl, nents, sg_flags);
+
+	local_irq_save(flags);
+
+	while (sg_miter_next(&miter) && offset < total_buffer) {
+		unsigned int len;
+		unsigned int ignore;
+
+		if ((offset + miter.length) > skip) {
+			if (offset < skip) {
+				/* Copy part of this segment */
+				ignore = skip - offset;
+				len = miter.length - ignore;
+				memcpy(buf + boffset, miter.addr + ignore, len);
+			} else {
+				/* Copy all of this segment */
+				len = miter.length;
+				memcpy(buf + boffset, miter.addr, len);
+			}
+			boffset += len;
+		}
+		offset += miter.length;
+	}
+
+	sg_miter_stop(&miter);
+
+	local_irq_restore(flags);
+	return boffset;
+}
+
 /*
  * allocate and map the extended descriptor
  */
 static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 						 struct scatterlist *src,
 						 struct scatterlist *dst,
+						 int hash_result,
 						 unsigned int cryptlen,
 						 unsigned int authsize,
 						 int icv_stashing,
@@ -1141,11 +1224,16 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	src_nents = sg_count(src, cryptlen + authsize, &src_chained);
 	src_nents = (src_nents == 1) ? 0 : src_nents;
 
-	if (dst == src) {
-		dst_nents = src_nents;
+	if (hash_result) {
+		dst_nents = 0;
 	} else {
-		dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained);
-		dst_nents = (dst_nents == 1) ? 0 : dst_nents;
+		if (dst == src) {
+			dst_nents = src_nents;
+		} else {
+			dst_nents = sg_count(dst, cryptlen + authsize,
+					     &dst_chained);
+			dst_nents = (dst_nents == 1) ? 0 : dst_nents;
+		}
 	}
 
 	/*
@@ -1174,8 +1262,10 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	edesc->src_is_chained = src_chained;
 	edesc->dst_is_chained = dst_chained;
 	edesc->dma_len = dma_len;
-	edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
-					     edesc->dma_len, DMA_BIDIRECTIONAL);
+	if (dma_len)
+		edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
+						     edesc->dma_len,
+						     DMA_BIDIRECTIONAL);
 
 	return edesc;
 }
@@ -1186,7 +1276,7 @@ static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq,
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 
-	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
+	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0,
 				   areq->cryptlen, ctx->authsize, icv_stashing,
 				   areq->base.flags);
 }
@@ -1443,8 +1533,8 @@ static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
 	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
 
-	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes,
-				   0, 0, areq->base.flags);
+	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0,
+				   areq->nbytes, 0, 0, areq->base.flags);
 }
 
 static int ablkcipher_encrypt(struct ablkcipher_request *areq)
@@ -1480,6 +1570,286 @@ static int ablkcipher_decrypt(struct ablkcipher_request *areq)
 	return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
 }
 
+static void common_nonsnoop_hash_unmap(struct device *dev,
+				       struct talitos_edesc *edesc,
+				       struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+
+	/* When using hashctx-in, must unmap it. */
+	if (edesc->desc.ptr[1].len)
+		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
+					 DMA_TO_DEVICE);
+
+	if (edesc->desc.ptr[2].len)
+		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2],
+					 DMA_TO_DEVICE);
+
+	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL);
+
+	if (edesc->dma_len)
+		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
+				 DMA_BIDIRECTIONAL);
+
+}
+
+static void ahash_done(struct device *dev,
+		       struct talitos_desc *desc, void *context,
+		       int err)
+{
+	struct ahash_request *areq = context;
+	struct talitos_edesc *edesc =
+		 container_of(desc, struct talitos_edesc, desc);
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	if (!req_ctx->last && req_ctx->to_hash_later) {
+		/* Position any partial block for next update/final/finup */
+		memcpy(req_ctx->buf, req_ctx->bufnext, req_ctx->to_hash_later);
+	}
+	common_nonsnoop_hash_unmap(dev, edesc, areq);
+
+	kfree(edesc);
+
+	areq->base.complete(&areq->base, err);
+}
+
+static int common_nonsnoop_hash(struct talitos_edesc *edesc,
+				struct ahash_request *areq, unsigned int length,
+				void (*callback) (struct device *dev,
+						  struct talitos_desc *desc,
+						  void *context, int error))
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct device *dev = ctx->dev;
+	struct talitos_desc *desc = &edesc->desc;
+	int sg_count, ret;
+
+	/* first DWORD empty */
+	desc->ptr[0] = zero_entry;
+
+	/* hash context in (if not first) */
+	if (!req_ctx->first) {
+		map_single_talitos_ptr(dev, &desc->ptr[1],
+				       req_ctx->hw_context_size,
+				       (char *)req_ctx->hw_context, 0,
+				       DMA_TO_DEVICE);
+	} else {
+		desc->ptr[1] = zero_entry;
+		/* Indicate next op is not the first. */
+		req_ctx->first = 0;
+	}
+
+	/* HMAC key */
+	if (ctx->keylen)
+		map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
+				       (char *)&ctx->key, 0, DMA_TO_DEVICE);
+	else
+		desc->ptr[2] = zero_entry;
+
+	/*
+	 * data in
+	 */
+	desc->ptr[3].len = cpu_to_be16(length);
+	desc->ptr[3].j_extent = 0;
+
+	sg_count = talitos_map_sg(dev, req_ctx->psrc,
+				  edesc->src_nents ? : 1,
+				  DMA_TO_DEVICE,
+				  edesc->src_is_chained);
+
+	if (sg_count == 1) {
+		to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc));
+	} else {
+		sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length,
+					  &edesc->link_tbl[0]);
+		if (sg_count > 1) {
+			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
+			to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl);
+			dma_sync_single_for_device(ctx->dev,
+						   edesc->dma_link_tbl,
+						   edesc->dma_len,
+						   DMA_BIDIRECTIONAL);
+		} else {
+			/* Only one segment now, so no link tbl needed */
+			to_talitos_ptr(&desc->ptr[3],
+				       sg_dma_address(req_ctx->psrc));
+		}
+	}
+
+	/* fifth DWORD empty */
+	desc->ptr[4] = zero_entry;
+
+	/* hash/HMAC out -or- hash context out */
+	if (req_ctx->last)
+		map_single_talitos_ptr(dev, &desc->ptr[5],
+				       crypto_ahash_digestsize(tfm),
+				       areq->result, 0, DMA_FROM_DEVICE);
+	else
+		map_single_talitos_ptr(dev, &desc->ptr[5],
+				       req_ctx->hw_context_size,
+				       req_ctx->hw_context, 0, DMA_FROM_DEVICE);
+
+	/* last DWORD empty */
+	desc->ptr[6] = zero_entry;
+
+	ret = talitos_submit(dev, desc, callback, areq);
+	if (ret != -EINPROGRESS) {
+		common_nonsnoop_hash_unmap(dev, edesc, areq);
+		kfree(edesc);
+	}
+	return ret;
+}
+
+static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq,
+					       unsigned int nbytes)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	return talitos_edesc_alloc(ctx->dev, req_ctx->psrc, NULL, 1,
+				   nbytes, 0, 0, areq->base.flags);
+}
+
+static int ahash_init(struct ahash_request *areq)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	/* Initialize the context */
+	req_ctx->count = 0;
+	req_ctx->first = 1; /* first indicates h/w must init it's context */
+	req_ctx->hw_context_size =
+		(crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+			? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+			: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+
+	return 0;
+}
+
+static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct talitos_edesc *edesc;
+	unsigned int blocksize =
+			crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int nbytes_to_hash;
+	unsigned int to_hash_later;
+	unsigned int index;
+	int chained;
+
+	index = req_ctx->count & (blocksize - 1);
+	req_ctx->count += nbytes;
+
+	if (!req_ctx->last && (index + nbytes) < blocksize) {
+		/* Buffer the partial block */
+		sg_copy_to_buffer(areq->src,
+				  sg_count(areq->src, nbytes, &chained),
+				  req_ctx->buf + index, nbytes);
+		return 0;
+	}
+
+	if (index) {
+		/* partial block from previous update; chain it in. */
+		sg_init_table(req_ctx->bufsl, (nbytes) ? 2 : 1);
+		sg_set_buf(req_ctx->bufsl, req_ctx->buf, index);
+		if (nbytes)
+			scatterwalk_sg_chain(req_ctx->bufsl, 2,
+					     areq->src);
+		req_ctx->psrc = req_ctx->bufsl;
+	} else {
+		req_ctx->psrc = areq->src;
+	}
+	nbytes_to_hash =  index + nbytes;
+	if (!req_ctx->last) {
+		to_hash_later = (nbytes_to_hash & (blocksize - 1));
+		if (to_hash_later) {
+			int nents;
+			/* Must copy to_hash_later bytes from the end
+			 * to bufnext (a partial block) for later.
+			 */
+			nents = sg_count(areq->src, nbytes, &chained);
+			sg_copy_end_to_buffer(areq->src, nents,
+					      req_ctx->bufnext,
+					      to_hash_later,
+					      nbytes - to_hash_later);
+
+			/* Adjust count for what will be hashed now */
+			nbytes_to_hash -= to_hash_later;
+		}
+		req_ctx->to_hash_later = to_hash_later;
+	}
+
+	/* allocate extended descriptor */
+	edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	edesc->desc.hdr = ctx->desc_hdr_template;
+
+	/* On last one, request SEC to pad; otherwise continue */
+	if (req_ctx->last)
+		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
+	else
+		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
+
+	/* On first one, request SEC to INIT hash. */
+	if (req_ctx->first)
+		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
+
+	/* When the tfm context has a keylen, it's an HMAC.
+	 * A first or last (ie. not middle) descriptor must request HMAC.
+	 */
+	if (ctx->keylen && (req_ctx->first || req_ctx->last))
+		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+
+	return common_nonsnoop_hash(edesc, areq, nbytes_to_hash,
+				    ahash_done);
+}
+
+static int ahash_update(struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	req_ctx->last = 0;
+
+	return ahash_process_req(areq, areq->nbytes);
+}
+
+static int ahash_final(struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	req_ctx->last = 1;
+
+	return ahash_process_req(areq, 0);
+}
+
+static int ahash_finup(struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	req_ctx->last = 1;
+
+	return ahash_process_req(areq, areq->nbytes);
+}
+
+static int ahash_digest(struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	ahash_init(areq);
+	req_ctx->last = 1;
+
+	return ahash_process_req(areq, areq->nbytes);
+}
+
 struct talitos_alg_template {
 	u32 type;
 	union {
@@ -1695,7 +2065,113 @@ static struct talitos_alg_template driver_algs[] = {
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
 		                     DESC_HDR_MODE0_DEU_3DES,
-	}
+	},
+	/* AHASH algorithms. */
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.halg.digestsize = MD5_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "md5",
+				.cra_driver_name = "md5-talitos",
+				.cra_blocksize = MD5_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUA |
+				     DESC_HDR_MODE0_MDEU_MD5,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.halg.digestsize = SHA1_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "sha1-talitos",
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUA |
+				     DESC_HDR_MODE0_MDEU_SHA1,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.halg.digestsize = SHA256_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "sha256-talitos",
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUA |
+				     DESC_HDR_MODE0_MDEU_SHA256,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.halg.digestsize = SHA384_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha384",
+				.cra_driver_name = "sha384-talitos",
+				.cra_blocksize = SHA384_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUB |
+				     DESC_HDR_MODE0_MDEUB_SHA384,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.halg.digestsize = SHA512_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha512",
+				.cra_driver_name = "sha512-talitos",
+				.cra_blocksize = SHA512_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUB |
+				     DESC_HDR_MODE0_MDEUB_SHA512,
+	},
 };
 
 struct talitos_crypto_alg {
@@ -1710,8 +2186,13 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
 	struct talitos_crypto_alg *talitos_alg;
 	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	talitos_alg =  container_of(alg, struct talitos_crypto_alg,
-				    algt.alg.crypto);
+	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
+		talitos_alg = container_of(__crypto_ahash_alg(alg),
+					   struct talitos_crypto_alg,
+					   algt.alg.hash);
+	else
+		talitos_alg = container_of(alg, struct talitos_crypto_alg,
+					   algt.alg.crypto);
 
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
@@ -1719,12 +2200,34 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
 	/* copy descriptor header template value */
 	ctx->desc_hdr_template = talitos_alg->algt.desc_hdr_template;
 
+	return 0;
+}
+
+static int talitos_cra_init_aead(struct crypto_tfm *tfm)
+{
+	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	talitos_cra_init(tfm);
+
 	/* random first IV */
 	get_random_bytes(ctx->iv, TALITOS_MAX_IV_LENGTH);
 
 	return 0;
 }
 
+static int talitos_cra_init_ahash(struct crypto_tfm *tfm)
+{
+	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	talitos_cra_init(tfm);
+
+	ctx->keylen = 0;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct talitos_ahash_req_ctx));
+
+	return 0;
+}
+
 /*
  * given the alg's descriptor header template, determine whether descriptor
  * type and primary/secondary execution units required match the hw
@@ -1806,15 +2309,20 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 
 	switch (t_alg->algt.type) {
 	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		alg = &t_alg->algt.alg.crypto;
+		alg->cra_init = talitos_cra_init;
+		break;
 	case CRYPTO_ALG_TYPE_AEAD:
 		alg = &t_alg->algt.alg.crypto;
+		alg->cra_init = talitos_cra_init_aead;
 		break;
 	case CRYPTO_ALG_TYPE_AHASH:
 		alg = &t_alg->algt.alg.hash.halg.base;
+		alg->cra_init = talitos_cra_init_ahash;
+		break;
 	}
 
 	alg->cra_module = THIS_MODULE;
-	alg->cra_init = talitos_cra_init;
 	alg->cra_priority = TALITOS_CRA_PRIORITY;
 	alg->cra_alignmask = 0;
 	alg->cra_ctxsize = sizeof(struct talitos_ctx);
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index ff5a145..05c57b7 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -130,6 +130,9 @@
 #define TALITOS_CRCUISR			0xf030 /* cyclic redundancy check unit*/
 #define TALITOS_CRCUISR_LO		0xf034
 
+#define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256	0x28
+#define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512		0x48
+
 /*
  * talitos descriptor header (hdr) bits
  */
@@ -157,12 +160,15 @@
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_DEU_3DES		cpu_to_be32(0x00200000)
+#define	DESC_HDR_MODE0_MDEU_CONT	cpu_to_be32(0x08000000)
 #define	DESC_HDR_MODE0_MDEU_INIT	cpu_to_be32(0x01000000)
 #define	DESC_HDR_MODE0_MDEU_HMAC	cpu_to_be32(0x00800000)
 #define	DESC_HDR_MODE0_MDEU_PAD		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_MDEU_MD5		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_SHA256	cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_MDEU_SHA1	cpu_to_be32(0x00000000)
+#define	DESC_HDR_MODE0_MDEUB_SHA384	cpu_to_be32(0x00000000)
+#define	DESC_HDR_MODE0_MDEUB_SHA512	cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_MD5_HMAC	(DESC_HDR_MODE0_MDEU_MD5 | \
 					 DESC_HDR_MODE0_MDEU_HMAC)
 #define	DESC_HDR_MODE0_MDEU_SHA256_HMAC	(DESC_HDR_MODE0_MDEU_SHA256 | \
@@ -184,6 +190,8 @@
 #define	DESC_HDR_MODE1_MDEU_MD5		cpu_to_be32(0x00000200)
 #define	DESC_HDR_MODE1_MDEU_SHA256	cpu_to_be32(0x00000100)
 #define	DESC_HDR_MODE1_MDEU_SHA1	cpu_to_be32(0x00000000)
+#define	DESC_HDR_MODE1_MDEUB_SHA384	cpu_to_be32(0x00000000)
+#define	DESC_HDR_MODE1_MDEUB_SHA512	cpu_to_be32(0x00000200)
 #define	DESC_HDR_MODE1_MDEU_MD5_HMAC	(DESC_HDR_MODE1_MDEU_MD5 | \
 					 DESC_HDR_MODE1_MDEU_HMAC)
 #define	DESC_HDR_MODE1_MDEU_SHA256_HMAC	(DESC_HDR_MODE1_MDEU_SHA256 | \
-- 
cgit v0.10.2


From 60f208d7836216885cdcd6f77a02f31dbc66f169 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Wed, 19 May 2010 19:21:53 +1000
Subject: crypto: talitos - add support for sha224

SEC h/w versions 2.1 and above support sha224 via explicit instruction.

Performing sha224 ahashes on earlier versions is still possible because
they support sha256 (sha224 is sha256 with different initial constants
and a different truncation length).  We do this by overriding hardware
context self-initialization, and perform it manually in s/w instead.

Thanks to Lee for his fixes for correct execution on actual sec2.0 h/w.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off by: Lee Nipper <lee.nipper@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1b08a39..6a0f59d 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1,7 +1,7 @@
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
- * Copyright (c) 2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008-2010 Freescale Semiconductor, Inc.
  *
  * Scatterlist Crypto API glue code copied from files with the following:
  * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
@@ -156,6 +156,7 @@ struct talitos_private {
 /* .features flag */
 #define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
 #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
+#define TALITOS_FTR_SHA224_HWINIT 0x00000004
 
 static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
 {
@@ -720,10 +721,11 @@ struct talitos_ctx {
 
 struct talitos_ahash_req_ctx {
 	u64 count;
-	u8 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE];
+	u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
 	unsigned int hw_context_size;
 	u8 buf[HASH_MAX_BLOCK_SIZE];
 	u8 bufnext[HASH_MAX_BLOCK_SIZE];
+	unsigned int swinit;
 	unsigned int first;
 	unsigned int last;
 	unsigned int to_hash_later;
@@ -1631,12 +1633,13 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	/* first DWORD empty */
 	desc->ptr[0] = zero_entry;
 
-	/* hash context in (if not first) */
-	if (!req_ctx->first) {
+	/* hash context in */
+	if (!req_ctx->first || req_ctx->swinit) {
 		map_single_talitos_ptr(dev, &desc->ptr[1],
 				       req_ctx->hw_context_size,
 				       (char *)req_ctx->hw_context, 0,
 				       DMA_TO_DEVICE);
+		req_ctx->swinit = 0;
 	} else {
 		desc->ptr[1] = zero_entry;
 		/* Indicate next op is not the first. */
@@ -1722,7 +1725,8 @@ static int ahash_init(struct ahash_request *areq)
 
 	/* Initialize the context */
 	req_ctx->count = 0;
-	req_ctx->first = 1; /* first indicates h/w must init it's context */
+	req_ctx->first = 1; /* first indicates h/w must init its context */
+	req_ctx->swinit = 0; /* assume h/w init of context */
 	req_ctx->hw_context_size =
 		(crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
 			? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
@@ -1731,6 +1735,33 @@ static int ahash_init(struct ahash_request *areq)
 	return 0;
 }
 
+/*
+ * on h/w without explicit sha224 support, we initialize h/w context
+ * manually with sha224 constants, and tell it to run sha256.
+ */
+static int ahash_init_sha224_swinit(struct ahash_request *areq)
+{
+	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+	ahash_init(areq);
+	req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
+
+	req_ctx->hw_context[0] = cpu_to_be32(SHA224_H0);
+	req_ctx->hw_context[1] = cpu_to_be32(SHA224_H1);
+	req_ctx->hw_context[2] = cpu_to_be32(SHA224_H2);
+	req_ctx->hw_context[3] = cpu_to_be32(SHA224_H3);
+	req_ctx->hw_context[4] = cpu_to_be32(SHA224_H4);
+	req_ctx->hw_context[5] = cpu_to_be32(SHA224_H5);
+	req_ctx->hw_context[6] = cpu_to_be32(SHA224_H6);
+	req_ctx->hw_context[7] = cpu_to_be32(SHA224_H7);
+
+	/* init 64-bit count */
+	req_ctx->hw_context[8] = 0;
+	req_ctx->hw_context[9] = 0;
+
+	return 0;
+}
+
 static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
@@ -1799,8 +1830,8 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	else
 		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
 
-	/* On first one, request SEC to INIT hash. */
-	if (req_ctx->first)
+	/* request SEC to INIT hash. */
+	if (req_ctx->first && !req_ctx->swinit)
 		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
 
 	/* When the tfm context has a keylen, it's an HMAC.
@@ -1843,8 +1874,9 @@ static int ahash_finup(struct ahash_request *areq)
 static int ahash_digest(struct ahash_request *areq)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 
-	ahash_init(areq);
+	ahash->init(areq);
 	req_ctx->last = 1;
 
 	return ahash_process_req(areq, areq->nbytes);
@@ -2116,6 +2148,27 @@ static struct talitos_alg_template driver_algs[] = {
 			.final = ahash_final,
 			.finup = ahash_finup,
 			.digest = ahash_digest,
+			.halg.digestsize = SHA224_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "sha224-talitos",
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_flags = CRYPTO_ALG_TYPE_AHASH |
+					     CRYPTO_ALG_ASYNC,
+				.cra_type = &crypto_ahash_type
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_MDEUA |
+				     DESC_HDR_MODE0_MDEU_SHA224,
+	},
+	{	.type = CRYPTO_ALG_TYPE_AHASH,
+		.alg.hash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
 			.halg.digestsize = SHA256_DIGEST_SIZE,
 			.halg.base = {
 				.cra_name = "sha256",
@@ -2298,6 +2351,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 						    struct talitos_alg_template
 						           *template)
 {
+	struct talitos_private *priv = dev_get_drvdata(dev);
 	struct talitos_crypto_alg *t_alg;
 	struct crypto_alg *alg;
 
@@ -2319,6 +2373,14 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	case CRYPTO_ALG_TYPE_AHASH:
 		alg = &t_alg->algt.alg.hash.halg.base;
 		alg->cra_init = talitos_cra_init_ahash;
+		if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) &&
+		    !strcmp(alg->cra_name, "sha224")) {
+			t_alg->algt.alg.hash.init = ahash_init_sha224_swinit;
+			t_alg->algt.desc_hdr_template =
+					DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+					DESC_HDR_SEL0_MDEUA |
+					DESC_HDR_MODE0_MDEU_SHA256;
+		}
 		break;
 	}
 
@@ -2406,7 +2468,8 @@ static int talitos_probe(struct of_device *ofdev,
 		priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT;
 
 	if (of_device_is_compatible(np, "fsl,sec2.1"))
-		priv->features |= TALITOS_FTR_HW_AUTH_CHECK;
+		priv->features |= TALITOS_FTR_HW_AUTH_CHECK |
+				  TALITOS_FTR_SHA224_HWINIT;
 
 	priv->chan = kzalloc(sizeof(struct talitos_channel) *
 			     priv->num_channels, GFP_KERNEL);
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 05c57b7..0b746ac 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -1,7 +1,7 @@
 /*
  * Freescale SEC (talitos) device register and descriptor header defines
  *
- * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2006-2010 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -164,6 +164,7 @@
 #define	DESC_HDR_MODE0_MDEU_INIT	cpu_to_be32(0x01000000)
 #define	DESC_HDR_MODE0_MDEU_HMAC	cpu_to_be32(0x00800000)
 #define	DESC_HDR_MODE0_MDEU_PAD		cpu_to_be32(0x00400000)
+#define	DESC_HDR_MODE0_MDEU_SHA224	cpu_to_be32(0x00300000)
 #define	DESC_HDR_MODE0_MDEU_MD5		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_SHA256	cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_MDEU_SHA1	cpu_to_be32(0x00000000)
@@ -187,6 +188,7 @@
 #define	DESC_HDR_MODE1_MDEU_INIT	cpu_to_be32(0x00001000)
 #define	DESC_HDR_MODE1_MDEU_HMAC	cpu_to_be32(0x00000800)
 #define	DESC_HDR_MODE1_MDEU_PAD		cpu_to_be32(0x00000400)
+#define	DESC_HDR_MODE1_MDEU_SHA224	cpu_to_be32(0x00000300)
 #define	DESC_HDR_MODE1_MDEU_MD5		cpu_to_be32(0x00000200)
 #define	DESC_HDR_MODE1_MDEU_SHA256	cpu_to_be32(0x00000100)
 #define	DESC_HDR_MODE1_MDEU_SHA1	cpu_to_be32(0x00000000)
-- 
cgit v0.10.2


From 921bae54693f26d01fb8e10ee6968b5cd8184551 Mon Sep 17 00:00:00 2001
From: Shikhar Khattar <shikhark@gmail.com>
Date: Thu, 20 May 2010 19:40:31 +1000
Subject: crypto: authenc - Fix cryptlen calculation

This patch (applied against 2.6.34) fixes the calculation of the
length of the ABLKCIPHER decrypt request ("cryptlen") after an
asynchronous hash request has been completed in the AUTHENC interface.

Signed-off-by: Shikhar Khattar <shikhark@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/authenc.c b/crypto/authenc.c
index 05eb32e..b9884ee 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -181,6 +181,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
 	struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff);
+	unsigned int cryptlen = req->cryptlen;
 
 	if (err)
 		goto out;
@@ -196,6 +197,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
 		goto out;
 
 	authsize = crypto_aead_authsize(authenc);
+	cryptlen -= authsize;
 	ihash = ahreq->result + authsize;
 	scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen,
 				 authsize, 0);
@@ -209,7 +211,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
 	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
 					req->base.complete, req->base.data);
 	ablkcipher_request_set_crypt(abreq, req->src, req->dst,
-				     req->cryptlen, req->iv);
+				     cryptlen, req->iv);
 
 	err = crypto_ablkcipher_decrypt(abreq);
 
@@ -228,11 +230,13 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq,
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
 	struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff);
+	unsigned int cryptlen = req->cryptlen;
 
 	if (err)
 		goto out;
 
 	authsize = crypto_aead_authsize(authenc);
+	cryptlen -= authsize;
 	ihash = ahreq->result + authsize;
 	scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen,
 				 authsize, 0);
@@ -246,7 +250,7 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq,
 	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
 					req->base.complete, req->base.data);
 	ablkcipher_request_set_crypt(abreq, req->src, req->dst,
-				     req->cryptlen, req->iv);
+				     cryptlen, req->iv);
 
 	err = crypto_ablkcipher_decrypt(abreq);
 
-- 
cgit v0.10.2


From e954bc91bdd4bb08b8325478c5004b24a23a3522 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Thu, 20 May 2010 19:55:01 +1000
Subject: random: simplify fips mode

Rather than dynamically allocate 10 bytes, move it to static allocation.
This saves space and avoids the need for error checking.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 2fd3d39..8d85587 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -257,6 +257,7 @@
 #define INPUT_POOL_WORDS 128
 #define OUTPUT_POOL_WORDS 32
 #define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10
 
 /*
  * The minimum number of bits of entropy before we wake up a read on
@@ -414,7 +415,7 @@ struct entropy_store {
 	unsigned add_ptr;
 	int entropy_count;
 	int input_rotate;
-	__u8 *last_data;
+	__u8 last_data[EXTRACT_SIZE];
 };
 
 static __u32 input_pool_data[INPUT_POOL_WORDS];
@@ -714,8 +715,6 @@ void add_disk_randomness(struct gendisk *disk)
 }
 #endif
 
-#define EXTRACT_SIZE 10
-
 /*********************************************************************
  *
  * Entropy extraction routines
@@ -862,7 +861,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 	while (nbytes) {
 		extract_buf(r, tmp);
 
-		if (r->last_data) {
+		if (fips_enabled) {
 			spin_lock_irqsave(&r->lock, flags);
 			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
 				panic("Hardware RNG duplicated output!\n");
@@ -951,9 +950,6 @@ static void init_std_data(struct entropy_store *r)
 	now = ktime_get_real();
 	mix_pool_bytes(r, &now, sizeof(now));
 	mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
-	/* Enable continuous test in fips mode */
-	if (fips_enabled)
-		r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL);
 }
 
 static int rand_initialize(void)
-- 
cgit v0.10.2