From f3c85bc1bc72b4cc8d58664a490a9d42bdb6565a Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@freescale.com>
Date: Wed, 30 Jul 2008 16:26:57 +0800
Subject: crypto: talitos - Add handling for SEC 3.x treatment of link table

Later SEC revision requires the link table (used for scatter/gather)
to have an extra entry to account for the total length in descriptor [4],
which contains cipher Input and ICV.
This only applies to decrypt, not encrypt.
Without this change, on 837x, a gather return/length error results
when a decryption uses a link table to gather the fragments.
This is observed by doing a ping with size of 1447 or larger with AES,
or a ping with size 1455 or larger with 3des.

So, add check for SEC compatible "fsl,3.0" for using extra link table entry.

Signed-off-by: Lee Nipper <lee.nipper@freescale.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 681c15f..ee827a7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -96,6 +96,9 @@ struct talitos_private {
 	unsigned int exec_units;
 	unsigned int desc_types;
 
+	/* SEC Compatibility info */
+	unsigned long features;
+
 	/* next channel to be assigned next incoming descriptor */
 	atomic_t last_chan;
 
@@ -133,6 +136,9 @@ struct talitos_private {
 	struct hwrng rng;
 };
 
+/* .features flag */
+#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
+
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
@@ -785,7 +791,7 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 	/* copy the generated ICV to dst */
 	if (edesc->dma_len) {
 		icvdata = &edesc->link_tbl[edesc->src_nents +
-					   edesc->dst_nents + 1];
+					   edesc->dst_nents + 2];
 		sg = sg_last(areq->dst, edesc->dst_nents);
 		memcpy((char *)sg_virt(sg) + sg->length - ctx->authsize,
 		       icvdata, ctx->authsize);
@@ -814,7 +820,7 @@ static void ipsec_esp_decrypt_done(struct device *dev,
 		/* auth check */
 		if (edesc->dma_len)
 			icvdata = &edesc->link_tbl[edesc->src_nents +
-						   edesc->dst_nents + 1];
+						   edesc->dst_nents + 2];
 		else
 			icvdata = &edesc->link_tbl[0];
 
@@ -921,10 +927,30 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
 		sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen,
 					  &edesc->link_tbl[0]);
 		if (sg_count > 1) {
+			struct talitos_ptr *link_tbl_ptr =
+				&edesc->link_tbl[sg_count-1];
+			struct scatterlist *sg;
+			struct talitos_private *priv = dev_get_drvdata(dev);
+
 			desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
 			desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl);
 			dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
 						   edesc->dma_len, DMA_BIDIRECTIONAL);
+			/* If necessary for this SEC revision,
+			 * add a link table entry for ICV.
+			 */
+			if ((priv->features &
+			     TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT) &&
+			    (edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) {
+				link_tbl_ptr->j_extent = 0;
+				link_tbl_ptr++;
+				link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
+				link_tbl_ptr->len = cpu_to_be16(authsize);
+				sg = sg_last(areq->src, edesc->src_nents ? : 1);
+				link_tbl_ptr->ptr = cpu_to_be32(
+						(char *)sg_dma_address(sg)
+						+ sg->length - authsize);
+			}
 		} else {
 			/* Only one segment now, so no link tbl needed */
 			desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
@@ -944,12 +970,11 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
 		desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst));
 	} else {
 		struct talitos_ptr *link_tbl_ptr =
-			&edesc->link_tbl[edesc->src_nents];
-		struct scatterlist *sg;
+			&edesc->link_tbl[edesc->src_nents + 1];
 
 		desc->ptr[5].ptr = cpu_to_be32((struct talitos_ptr *)
 					       edesc->dma_link_tbl +
-					       edesc->src_nents);
+					       edesc->src_nents + 1);
 		if (areq->src == areq->dst) {
 			memcpy(link_tbl_ptr, &edesc->link_tbl[0],
 			       edesc->src_nents * sizeof(struct talitos_ptr));
@@ -957,14 +982,10 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
 			sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
 						  link_tbl_ptr);
 		}
+		/* Add an entry to the link table for ICV data */
 		link_tbl_ptr += sg_count - 1;
-
-		/* handle case where sg_last contains the ICV exclusively */
-		sg = sg_last(areq->dst, edesc->dst_nents);
-		if (sg->length == ctx->authsize)
-			link_tbl_ptr--;
-
 		link_tbl_ptr->j_extent = 0;
+		sg_count++;
 		link_tbl_ptr++;
 		link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
 		link_tbl_ptr->len = cpu_to_be16(authsize);
@@ -973,7 +994,7 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
 		link_tbl_ptr->ptr = cpu_to_be32((struct talitos_ptr *)
 						edesc->dma_link_tbl +
 					        edesc->src_nents +
-						edesc->dst_nents + 1);
+						edesc->dst_nents + 2);
 
 		desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP;
 		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
@@ -1040,12 +1061,12 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
 
 	/*
 	 * allocate space for base edesc plus the link tables,
-	 * allowing for a separate entry for the generated ICV (+ 1),
+	 * allowing for two separate entries for ICV and generated ICV (+ 2),
 	 * and the ICV data itself
 	 */
 	alloc_len = sizeof(struct ipsec_esp_edesc);
 	if (src_nents || dst_nents) {
-		dma_len = (src_nents + dst_nents + 1) *
+		dma_len = (src_nents + dst_nents + 2) *
 				 sizeof(struct talitos_ptr) + ctx->authsize;
 		alloc_len += dma_len;
 	} else {
@@ -1104,7 +1125,7 @@ static int aead_authenc_decrypt(struct aead_request *req)
 	/* stash incoming ICV for later cmp with ICV generated by the h/w */
 	if (edesc->dma_len)
 		icvdata = &edesc->link_tbl[edesc->src_nents +
-					   edesc->dst_nents + 1];
+					   edesc->dst_nents + 2];
 	else
 		icvdata = &edesc->link_tbl[0];
 
@@ -1480,6 +1501,9 @@ static int talitos_probe(struct of_device *ofdev,
 		goto err_out;
 	}
 
+	if (of_device_is_compatible(np, "fsl,sec3.0"))
+		priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT;
+
 	priv->head_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels,
 				  GFP_KERNEL);
 	priv->tail_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels,
-- 
cgit v0.10.2


From f176e632efad33aeb2d3c5ddfa86861c0d60553c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 30 Jul 2008 16:23:51 +0800
Subject: crypto: tcrypt - Fix AEAD chunk testing

My changeset 4b22f0ddb6564210c9ded7ba25b2a1007733e784

	crypto: tcrpyt - Remove unnecessary kmap/kunmap calls

introduced a typo that broke AEAD chunk testing.  In particular,
axbuf should really be xbuf.

There is also an issue with testing the last segment when encrypting.
The additional part produced by AEAD wasn't tested.  Similarly, on
decryption the additional part of the AEAD input is mistaken for
corruption.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 59821a2..6636802 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -481,21 +481,31 @@ next_one:
 
 			for (k = 0, temp = 0; k < template[i].np; k++) {
 				printk(KERN_INFO "page %u\n", k);
-				q = &axbuf[IDX[k]];
-				hexdump(q, template[i].tap[k]);
+				q = &xbuf[IDX[k]];
+
+				n = template[i].tap[k];
+				if (k == template[i].np - 1)
+					n += enc ? authsize : -authsize;
+				hexdump(q, n);
 				printk(KERN_INFO "%s\n",
-				       memcmp(q, template[i].result + temp,
-					      template[i].tap[k] -
-					      (k < template[i].np - 1 || enc ?
-					       0 : authsize)) ?
+				       memcmp(q, template[i].result + temp, n) ?
 				       "fail" : "pass");
 
-				for (n = 0; q[template[i].tap[k] + n]; n++)
-					;
+				q += n;
+				if (k == template[i].np - 1 && !enc) {
+					if (memcmp(q, template[i].input +
+						      temp + n, authsize))
+						n = authsize;
+					else
+						n = 0;
+				} else {
+					for (n = 0; q[n]; n++)
+						;
+				}
 				if (n) {
 					printk("Result buffer corruption %u "
 					       "bytes:\n", n);
-					hexdump(&q[template[i].tap[k]], n);
+					hexdump(q, n);
 				}
 
 				temp += template[i].tap[k];
-- 
cgit v0.10.2


From dbaaba1d0abf6871c7db6e3d15a46206bc386db1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 3 Aug 2008 21:19:43 +0800
Subject: crypto: hash - Fix digest size check for digest type

The changeset ca786dc738f4f583b57b1bba7a335b5e8233f4b0

	crypto: hash - Fixed digest size check

missed one spot for the digest type.  This patch corrects that
error.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/digest.c b/crypto/digest.c
index ac09194..5d3f130 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -225,7 +225,7 @@ int crypto_init_digest_ops_async(struct crypto_tfm *tfm)
 	struct ahash_tfm  *crt  = &tfm->crt_ahash;
 	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
 
-	if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm))
+	if (dalg->dia_digestsize > PAGE_SIZE / 8)
 		return -EINVAL;
 
 	crt->init       = digest_async_init;
-- 
cgit v0.10.2


From 318e5313923197e71a94f7b18835151649384b7f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 5 Aug 2008 13:34:30 +0800
Subject: crypto: hash - Add missing top-level functions

The top-level functions init/update/final were missing for ahash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index d12498e..ee48ef8 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -101,6 +101,24 @@ static inline int crypto_ahash_digest(struct ahash_request *req)
 	return crt->digest(req);
 }
 
+static inline int crypto_ahash_init(struct ahash_request *req)
+{
+	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
+	return crt->init(req);
+}
+
+static inline int crypto_ahash_update(struct ahash_request *req)
+{
+	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
+	return crt->update(req);
+}
+
+static inline int crypto_ahash_final(struct ahash_request *req)
+{
+	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
+	return crt->final(req);
+}
+
 static inline void ahash_request_set_tfm(struct ahash_request *req,
 					 struct crypto_ahash *tfm)
 {
-- 
cgit v0.10.2


From e49140120c88eb99db1a9172d9ac224c0f2bbdd2 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 13 Aug 2008 22:02:26 +1000
Subject: crypto: padlock - fix VIA PadLock instruction usage with
 irq_ts_save/restore()

Wolfgang Walter reported this oops on his via C3 using padlock for
AES-encryption:

##################################################################

BUG: unable to handle kernel NULL pointer dereference at 000001f0
IP: [<c01028c5>] __switch_to+0x30/0x117
*pde = 00000000
Oops: 0002 [#1] PREEMPT
Modules linked in:

Pid: 2071, comm: sleep Not tainted (2.6.26 #11)
EIP: 0060:[<c01028c5>] EFLAGS: 00010002 CPU: 0
EIP is at __switch_to+0x30/0x117
EAX: 00000000 EBX: c0493300 ECX: dc48dd00 EDX: c0493300
ESI: dc48dd00 EDI: c0493530 EBP: c04cff8c ESP: c04cff7c
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process sleep (pid: 2071, ti=c04ce000 task=dc48dd00 task.ti=d2fe6000)
Stack: dc48df30 c0493300 00000000 00000000 d2fe7f44 c03b5b43 c04cffc8 00000046
       c0131856 0000005a dc472d3c c0493300 c0493470 d983ae00 00002696 00000000
       c0239f54 00000000 c04c4000 c04cffd8 c01025fe c04f3740 00049800 c04cffe0
Call Trace:
 [<c03b5b43>] ? schedule+0x285/0x2ff
 [<c0131856>] ? pm_qos_requirement+0x3c/0x53
 [<c0239f54>] ? acpi_processor_idle+0x0/0x434
 [<c01025fe>] ? cpu_idle+0x73/0x7f
 [<c03a4dcd>] ? rest_init+0x61/0x63
 =======================

Wolfgang also found out that adding kernel_fpu_begin() and kernel_fpu_end()
around the padlock instructions fix the oops.

Suresh wrote:

These padlock instructions though don't use/touch SSE registers, but it behaves
similar to other SSE instructions. For example, it might cause DNA faults
when cr0.ts is set. While this is a spurious DNA trap, it might cause
oops with the recent fpu code changes.

This is the code sequence  that is probably causing this problem:

a) new app is getting exec'd and it is somewhere in between
   start_thread() and flush_old_exec() in the load_xyz_binary()

b) At pont "a", task's fpu state (like TS_USEDFPU, used_math() etc) is
   cleared.

c) Now we get an interrupt/softirq which starts using these encrypt/decrypt
   routines in the network stack. This generates a math fault (as
   cr0.ts is '1') which sets TS_USEDFPU and restores the math that is
   in the task's xstate.

d) Return to exec code path, which does start_thread() which does
   free_thread_xstate() and sets xstate pointer to NULL while
   the TS_USEDFPU is still set.

e) At the next context switch from the new exec'd task to another task,
   we have a scenarios where TS_USEDFPU is set but xstate pointer is null.
   This can cause an oops during unlazy_fpu() in __switch_to()

Now:

1) This should happen with or with out pre-emption. Viro also encountered
   similar problem with out CONFIG_PREEMPT.

2) kernel_fpu_begin() and kernel_fpu_end() will fix this problem, because
   kernel_fpu_begin() will manually do a clts() and won't run in to the
   situation of setting TS_USEDFPU in step "c" above.

3) This was working before the fpu changes, because its a spurious
   math fault  which doesn't corrupt any fpu/sse registers and the task's
   math state was always in an allocated state.

With out the recent lazy fpu allocation changes, while we don't see oops,
there is a possible race still present in older kernels(for example,
while kernel is using kernel_fpu_begin() in some optimized clear/copy
page and an interrupt/softirq happens which uses these padlock
instructions generating DNA fault).

This is the failing scenario that existed even before the lazy fpu allocation
changes:

0. CPU's TS flag is set

1. kernel using FPU in some optimized copy  routine and while doing
kernel_fpu_begin() takes an interrupt just before doing clts()

2. Takes an interrupt and ipsec uses padlock instruction. And we
take a DNA fault as TS flag is still set.

3. We handle the DNA fault and set TS_USEDFPU and clear cr0.ts

4. We complete the padlock routine

5. Go back to step-1, which resumes clts() in kernel_fpu_begin(), finishes
the optimized copy routine and does kernel_fpu_end(). At this point,
we have cr0.ts again set to '1' but the task's TS_USEFPU is stilll
set and not cleared.

6. Now kernel resumes its user operation. And at the next context
switch, kernel sees it has do a FP save as TS_USEDFPU is still set
and then will do a unlazy_fpu() in __switch_to(). unlazy_fpu()
will take a DNA fault, as cr0.ts is '1' and now, because we are
in __switch_to(), math_state_restore() will get confused and will
restore the next task's FP state and will save it in prev tasks's FP state.
Remember, in __switch_to() we are already on the stack of the next task
but take a DNA fault for the prev task.

This causes the fpu leakage.

Fix the padlock instruction usage by calling them inside the
context of new routines irq_ts_save/restore(), which clear/restore cr0.ts
manually in the interrupt context. This will not generate spurious DNA
in the  context of the interrupt which will fix the oops encountered and
the possible FPU leakage issue.

Reported-and-bisected-by: Wolfgang Walter <wolfgang.walter@stwm.de>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index f7feae4..128202e 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -31,6 +31,7 @@
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/cpufeature.h>
+#include <asm/i387.h>
 
 
 #define PFX	KBUILD_MODNAME ": "
@@ -67,16 +68,23 @@ enum {
  * Another possible performance boost may come from simply buffering
  * until we have 4 bytes, thus returning a u32 at a time,
  * instead of the current u8-at-a-time.
+ *
+ * Padlock instructions can generate a spurious DNA fault, so
+ * we have to call them in the context of irq_ts_save/restore()
  */
 
 static inline u32 xstore(u32 *addr, u32 edx_in)
 {
 	u32 eax_out;
+	int ts_state;
+
+	ts_state = irq_ts_save();
 
 	asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
 		:"=m"(*addr), "=a"(eax_out)
 		:"D"(addr), "d"(edx_in));
 
+	irq_ts_restore(ts_state);
 	return eax_out;
 }
 
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 54a2a16..bf2917d 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <asm/byteorder.h>
+#include <asm/i387.h>
 #include "padlock.h"
 
 /* Control word. */
@@ -141,6 +142,12 @@ static inline void padlock_reset_key(void)
 	asm volatile ("pushfl; popfl");
 }
 
+/*
+ * While the padlock instructions don't use FP/SSE registers, they
+ * generate a spurious DNA fault when cr0.ts is '1'. These instructions
+ * should be used only inside the irq_ts_save/restore() context
+ */
+
 static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key,
 				  void *control_word)
 {
@@ -205,15 +212,23 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
+	int ts_state;
 	padlock_reset_key();
+
+	ts_state = irq_ts_save();
 	aes_crypt(in, out, ctx->E, &ctx->cword.encrypt);
+	irq_ts_restore(ts_state);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
+	int ts_state;
 	padlock_reset_key();
+
+	ts_state = irq_ts_save();
 	aes_crypt(in, out, ctx->D, &ctx->cword.decrypt);
+	irq_ts_restore(ts_state);
 }
 
 static struct crypto_alg aes_alg = {
@@ -244,12 +259,14 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
+	int ts_state;
 
 	padlock_reset_key();
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
+	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->E, &ctx->cword.encrypt,
@@ -257,6 +274,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
+	irq_ts_restore(ts_state);
 
 	return err;
 }
@@ -268,12 +286,14 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
+	int ts_state;
 
 	padlock_reset_key();
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
+	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, &ctx->cword.decrypt,
@@ -281,7 +301,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
-
+	irq_ts_restore(ts_state);
 	return err;
 }
 
@@ -314,12 +334,14 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
+	int ts_state;
 
 	padlock_reset_key();
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
+	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
 					    walk.dst.virt.addr, ctx->E,
@@ -329,6 +351,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
+	irq_ts_restore(ts_state);
 
 	return err;
 }
@@ -340,12 +363,14 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 	int err;
+	int ts_state;
 
 	padlock_reset_key();
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
+	ts_state = irq_ts_save();
 	while ((nbytes = walk.nbytes)) {
 		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
 				   ctx->D, walk.iv, &ctx->cword.decrypt,
@@ -354,6 +379,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
 
+	irq_ts_restore(ts_state);
 	return err;
 }
 
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 40d5680..a7fbade 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/scatterlist.h>
+#include <asm/i387.h>
 #include "padlock.h"
 
 #define SHA1_DEFAULT_FALLBACK	"sha1-generic"
@@ -102,6 +103,7 @@ static void padlock_do_sha1(const char *in, char *out, int count)
 	 *     PadLock microcode needs it that big. */
 	char buf[128+16];
 	char *result = NEAREST_ALIGNED(buf);
+	int ts_state;
 
 	((uint32_t *)result)[0] = SHA1_H0;
 	((uint32_t *)result)[1] = SHA1_H1;
@@ -109,9 +111,12 @@ static void padlock_do_sha1(const char *in, char *out, int count)
 	((uint32_t *)result)[3] = SHA1_H3;
 	((uint32_t *)result)[4] = SHA1_H4;
  
+	/* prevent taking the spurious DNA fault with padlock. */
+	ts_state = irq_ts_save();
 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
 		      : "+S"(in), "+D"(result)
 		      : "c"(count), "a"(0));
+	irq_ts_restore(ts_state);
 
 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
 }
@@ -123,6 +128,7 @@ static void padlock_do_sha256(const char *in, char *out, int count)
 	 *     PadLock microcode needs it that big. */
 	char buf[128+16];
 	char *result = NEAREST_ALIGNED(buf);
+	int ts_state;
 
 	((uint32_t *)result)[0] = SHA256_H0;
 	((uint32_t *)result)[1] = SHA256_H1;
@@ -133,9 +139,12 @@ static void padlock_do_sha256(const char *in, char *out, int count)
 	((uint32_t *)result)[6] = SHA256_H6;
 	((uint32_t *)result)[7] = SHA256_H7;
 
+	/* prevent taking the spurious DNA fault with padlock. */
+	ts_state = irq_ts_save();
 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
 		      : "+S"(in), "+D"(result)
 		      : "c"(count), "a"(0));
+	irq_ts_restore(ts_state);
 
 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
 }
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 96fa844..6d3b210 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
+#include <linux/hardirq.h>
 #include <asm/asm.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
@@ -236,6 +237,37 @@ static inline void kernel_fpu_end(void)
 	preempt_enable();
 }
 
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context aswell. To prevent these kernel instructions
+ * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+static inline int irq_ts_save(void)
+{
+	/*
+	 * If we are in process context, we are ok to take a spurious DNA fault.
+	 * Otherwise, doing clts() in process context require pre-emption to
+	 * be disabled or some heavy lifting like kernel_fpu_begin()
+	 */
+	if (!in_interrupt())
+		return 0;
+
+	if (read_cr0() & X86_CR0_TS) {
+		clts();
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void irq_ts_restore(int TS_state)
+{
+	if (TS_state)
+		stts();
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void save_init_fpu(struct task_struct *tsk)
-- 
cgit v0.10.2