From f3c85bc1bc72b4cc8d58664a490a9d42bdb6565a Mon Sep 17 00:00:00 2001 From: Lee Nipper Date: Wed, 30 Jul 2008 16:26:57 +0800 Subject: crypto: talitos - Add handling for SEC 3.x treatment of link table Later SEC revision requires the link table (used for scatter/gather) to have an extra entry to account for the total length in descriptor [4], which contains cipher Input and ICV. This only applies to decrypt, not encrypt. Without this change, on 837x, a gather return/length error results when a decryption uses a link table to gather the fragments. This is observed by doing a ping with size of 1447 or larger with AES, or a ping with size 1455 or larger with 3des. So, add check for SEC compatible "fsl,3.0" for using extra link table entry. Signed-off-by: Lee Nipper Signed-off-by: Kim Phillips Signed-off-by: Herbert Xu diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 681c15f..ee827a7 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -96,6 +96,9 @@ struct talitos_private { unsigned int exec_units; unsigned int desc_types; + /* SEC Compatibility info */ + unsigned long features; + /* next channel to be assigned next incoming descriptor */ atomic_t last_chan; @@ -133,6 +136,9 @@ struct talitos_private { struct hwrng rng; }; +/* .features flag */ +#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 + /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ @@ -785,7 +791,7 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->dma_len) { icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - ctx->authsize, icvdata, ctx->authsize); @@ -814,7 +820,7 @@ static void ipsec_esp_decrypt_done(struct device *dev, /* auth check */ if (edesc->dma_len) icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; else icvdata = &edesc->link_tbl[0]; @@ -921,10 +927,30 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen, &edesc->link_tbl[0]); if (sg_count > 1) { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[sg_count-1]; + struct scatterlist *sg; + struct talitos_private *priv = dev_get_drvdata(dev); + desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl); dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + /* If necessary for this SEC revision, + * add a link table entry for ICV. + */ + if ((priv->features & + TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT) && + (edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) { + link_tbl_ptr->j_extent = 0; + link_tbl_ptr++; + link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; + link_tbl_ptr->len = cpu_to_be16(authsize); + sg = sg_last(areq->src, edesc->src_nents ? : 1); + link_tbl_ptr->ptr = cpu_to_be32( + (char *)sg_dma_address(sg) + + sg->length - authsize); + } } else { /* Only one segment now, so no link tbl needed */ desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src)); @@ -944,12 +970,11 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst)); } else { struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents]; - struct scatterlist *sg; + &edesc->link_tbl[edesc->src_nents + 1]; desc->ptr[5].ptr = cpu_to_be32((struct talitos_ptr *) edesc->dma_link_tbl + - edesc->src_nents); + edesc->src_nents + 1); if (areq->src == areq->dst) { memcpy(link_tbl_ptr, &edesc->link_tbl[0], edesc->src_nents * sizeof(struct talitos_ptr)); @@ -957,14 +982,10 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, link_tbl_ptr); } + /* Add an entry to the link table for ICV data */ link_tbl_ptr += sg_count - 1; - - /* handle case where sg_last contains the ICV exclusively */ - sg = sg_last(areq->dst, edesc->dst_nents); - if (sg->length == ctx->authsize) - link_tbl_ptr--; - link_tbl_ptr->j_extent = 0; + sg_count++; link_tbl_ptr++; link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; link_tbl_ptr->len = cpu_to_be16(authsize); @@ -973,7 +994,7 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, link_tbl_ptr->ptr = cpu_to_be32((struct talitos_ptr *) edesc->dma_link_tbl + edesc->src_nents + - edesc->dst_nents + 1); + edesc->dst_nents + 2); desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, @@ -1040,12 +1061,12 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq, /* * allocate space for base edesc plus the link tables, - * allowing for a separate entry for the generated ICV (+ 1), + * allowing for two separate entries for ICV and generated ICV (+ 2), * and the ICV data itself */ alloc_len = sizeof(struct ipsec_esp_edesc); if (src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 1) * + dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + ctx->authsize; alloc_len += dma_len; } else { @@ -1104,7 +1125,7 @@ static int aead_authenc_decrypt(struct aead_request *req) /* stash incoming ICV for later cmp with ICV generated by the h/w */ if (edesc->dma_len) icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; else icvdata = &edesc->link_tbl[0]; @@ -1480,6 +1501,9 @@ static int talitos_probe(struct of_device *ofdev, goto err_out; } + if (of_device_is_compatible(np, "fsl,sec3.0")) + priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT; + priv->head_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels, GFP_KERNEL); priv->tail_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels, -- cgit v0.10.2 From f176e632efad33aeb2d3c5ddfa86861c0d60553c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 30 Jul 2008 16:23:51 +0800 Subject: crypto: tcrypt - Fix AEAD chunk testing My changeset 4b22f0ddb6564210c9ded7ba25b2a1007733e784 crypto: tcrpyt - Remove unnecessary kmap/kunmap calls introduced a typo that broke AEAD chunk testing. In particular, axbuf should really be xbuf. There is also an issue with testing the last segment when encrypting. The additional part produced by AEAD wasn't tested. Similarly, on decryption the additional part of the AEAD input is mistaken for corruption. Signed-off-by: Herbert Xu diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 59821a2..6636802 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -481,21 +481,31 @@ next_one: for (k = 0, temp = 0; k < template[i].np; k++) { printk(KERN_INFO "page %u\n", k); - q = &axbuf[IDX[k]]; - hexdump(q, template[i].tap[k]); + q = &xbuf[IDX[k]]; + + n = template[i].tap[k]; + if (k == template[i].np - 1) + n += enc ? authsize : -authsize; + hexdump(q, n); printk(KERN_INFO "%s\n", - memcmp(q, template[i].result + temp, - template[i].tap[k] - - (k < template[i].np - 1 || enc ? - 0 : authsize)) ? + memcmp(q, template[i].result + temp, n) ? "fail" : "pass"); - for (n = 0; q[template[i].tap[k] + n]; n++) - ; + q += n; + if (k == template[i].np - 1 && !enc) { + if (memcmp(q, template[i].input + + temp + n, authsize)) + n = authsize; + else + n = 0; + } else { + for (n = 0; q[n]; n++) + ; + } if (n) { printk("Result buffer corruption %u " "bytes:\n", n); - hexdump(&q[template[i].tap[k]], n); + hexdump(q, n); } temp += template[i].tap[k]; -- cgit v0.10.2 From dbaaba1d0abf6871c7db6e3d15a46206bc386db1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Aug 2008 21:19:43 +0800 Subject: crypto: hash - Fix digest size check for digest type The changeset ca786dc738f4f583b57b1bba7a335b5e8233f4b0 crypto: hash - Fixed digest size check missed one spot for the digest type. This patch corrects that error. Signed-off-by: Herbert Xu diff --git a/crypto/digest.c b/crypto/digest.c index ac09194..5d3f130 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -225,7 +225,7 @@ int crypto_init_digest_ops_async(struct crypto_tfm *tfm) struct ahash_tfm *crt = &tfm->crt_ahash; struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm)) + if (dalg->dia_digestsize > PAGE_SIZE / 8) return -EINVAL; crt->init = digest_async_init; -- cgit v0.10.2 From 318e5313923197e71a94f7b18835151649384b7f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 5 Aug 2008 13:34:30 +0800 Subject: crypto: hash - Add missing top-level functions The top-level functions init/update/final were missing for ahash. Signed-off-by: Herbert Xu diff --git a/include/crypto/hash.h b/include/crypto/hash.h index d12498e..ee48ef8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -101,6 +101,24 @@ static inline int crypto_ahash_digest(struct ahash_request *req) return crt->digest(req); } +static inline int crypto_ahash_init(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->init(req); +} + +static inline int crypto_ahash_update(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->update(req); +} + +static inline int crypto_ahash_final(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->final(req); +} + static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { -- cgit v0.10.2 From e49140120c88eb99db1a9172d9ac224c0f2bbdd2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 22:02:26 +1000 Subject: crypto: padlock - fix VIA PadLock instruction usage with irq_ts_save/restore() Wolfgang Walter reported this oops on his via C3 using padlock for AES-encryption: ################################################################## BUG: unable to handle kernel NULL pointer dereference at 000001f0 IP: [] __switch_to+0x30/0x117 *pde = 00000000 Oops: 0002 [#1] PREEMPT Modules linked in: Pid: 2071, comm: sleep Not tainted (2.6.26 #11) EIP: 0060:[] EFLAGS: 00010002 CPU: 0 EIP is at __switch_to+0x30/0x117 EAX: 00000000 EBX: c0493300 ECX: dc48dd00 EDX: c0493300 ESI: dc48dd00 EDI: c0493530 EBP: c04cff8c ESP: c04cff7c DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 Process sleep (pid: 2071, ti=c04ce000 task=dc48dd00 task.ti=d2fe6000) Stack: dc48df30 c0493300 00000000 00000000 d2fe7f44 c03b5b43 c04cffc8 00000046 c0131856 0000005a dc472d3c c0493300 c0493470 d983ae00 00002696 00000000 c0239f54 00000000 c04c4000 c04cffd8 c01025fe c04f3740 00049800 c04cffe0 Call Trace: [] ? schedule+0x285/0x2ff [] ? pm_qos_requirement+0x3c/0x53 [] ? acpi_processor_idle+0x0/0x434 [] ? cpu_idle+0x73/0x7f [] ? rest_init+0x61/0x63 ======================= Wolfgang also found out that adding kernel_fpu_begin() and kernel_fpu_end() around the padlock instructions fix the oops. Suresh wrote: These padlock instructions though don't use/touch SSE registers, but it behaves similar to other SSE instructions. For example, it might cause DNA faults when cr0.ts is set. While this is a spurious DNA trap, it might cause oops with the recent fpu code changes. This is the code sequence that is probably causing this problem: a) new app is getting exec'd and it is somewhere in between start_thread() and flush_old_exec() in the load_xyz_binary() b) At pont "a", task's fpu state (like TS_USEDFPU, used_math() etc) is cleared. c) Now we get an interrupt/softirq which starts using these encrypt/decrypt routines in the network stack. This generates a math fault (as cr0.ts is '1') which sets TS_USEDFPU and restores the math that is in the task's xstate. d) Return to exec code path, which does start_thread() which does free_thread_xstate() and sets xstate pointer to NULL while the TS_USEDFPU is still set. e) At the next context switch from the new exec'd task to another task, we have a scenarios where TS_USEDFPU is set but xstate pointer is null. This can cause an oops during unlazy_fpu() in __switch_to() Now: 1) This should happen with or with out pre-emption. Viro also encountered similar problem with out CONFIG_PREEMPT. 2) kernel_fpu_begin() and kernel_fpu_end() will fix this problem, because kernel_fpu_begin() will manually do a clts() and won't run in to the situation of setting TS_USEDFPU in step "c" above. 3) This was working before the fpu changes, because its a spurious math fault which doesn't corrupt any fpu/sse registers and the task's math state was always in an allocated state. With out the recent lazy fpu allocation changes, while we don't see oops, there is a possible race still present in older kernels(for example, while kernel is using kernel_fpu_begin() in some optimized clear/copy page and an interrupt/softirq happens which uses these padlock instructions generating DNA fault). This is the failing scenario that existed even before the lazy fpu allocation changes: 0. CPU's TS flag is set 1. kernel using FPU in some optimized copy routine and while doing kernel_fpu_begin() takes an interrupt just before doing clts() 2. Takes an interrupt and ipsec uses padlock instruction. And we take a DNA fault as TS flag is still set. 3. We handle the DNA fault and set TS_USEDFPU and clear cr0.ts 4. We complete the padlock routine 5. Go back to step-1, which resumes clts() in kernel_fpu_begin(), finishes the optimized copy routine and does kernel_fpu_end(). At this point, we have cr0.ts again set to '1' but the task's TS_USEFPU is stilll set and not cleared. 6. Now kernel resumes its user operation. And at the next context switch, kernel sees it has do a FP save as TS_USEDFPU is still set and then will do a unlazy_fpu() in __switch_to(). unlazy_fpu() will take a DNA fault, as cr0.ts is '1' and now, because we are in __switch_to(), math_state_restore() will get confused and will restore the next task's FP state and will save it in prev tasks's FP state. Remember, in __switch_to() we are already on the stack of the next task but take a DNA fault for the prev task. This causes the fpu leakage. Fix the padlock instruction usage by calling them inside the context of new routines irq_ts_save/restore(), which clear/restore cr0.ts manually in the interrupt context. This will not generate spurious DNA in the context of the interrupt which will fix the oops encountered and the possible FPU leakage issue. Reported-and-bisected-by: Wolfgang Walter Signed-off-by: Suresh Siddha Signed-off-by: Herbert Xu diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index f7feae4..128202e 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -31,6 +31,7 @@ #include #include #include +#include #define PFX KBUILD_MODNAME ": " @@ -67,16 +68,23 @@ enum { * Another possible performance boost may come from simply buffering * until we have 4 bytes, thus returning a u32 at a time, * instead of the current u8-at-a-time. + * + * Padlock instructions can generate a spurious DNA fault, so + * we have to call them in the context of irq_ts_save/restore() */ static inline u32 xstore(u32 *addr, u32 edx_in) { u32 eax_out; + int ts_state; + + ts_state = irq_ts_save(); asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" :"=m"(*addr), "=a"(eax_out) :"D"(addr), "d"(edx_in)); + irq_ts_restore(ts_state); return eax_out; } diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 54a2a16..bf2917d 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "padlock.h" /* Control word. */ @@ -141,6 +142,12 @@ static inline void padlock_reset_key(void) asm volatile ("pushfl; popfl"); } +/* + * While the padlock instructions don't use FP/SSE registers, they + * generate a spurious DNA fault when cr0.ts is '1'. These instructions + * should be used only inside the irq_ts_save/restore() context + */ + static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, void *control_word) { @@ -205,15 +212,23 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); + int ts_state; padlock_reset_key(); + + ts_state = irq_ts_save(); aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); + irq_ts_restore(ts_state); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); + int ts_state; padlock_reset_key(); + + ts_state = irq_ts_save(); aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); + irq_ts_restore(ts_state); } static struct crypto_alg aes_alg = { @@ -244,12 +259,14 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, &ctx->cword.encrypt, @@ -257,6 +274,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } @@ -268,12 +286,14 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, &ctx->cword.decrypt, @@ -281,7 +301,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - + irq_ts_restore(ts_state); return err; } @@ -314,12 +334,14 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, @@ -329,6 +351,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } @@ -340,12 +363,14 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, walk.iv, &ctx->cword.decrypt, @@ -354,6 +379,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 40d5680..a7fbade 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "padlock.h" #define SHA1_DEFAULT_FALLBACK "sha1-generic" @@ -102,6 +103,7 @@ static void padlock_do_sha1(const char *in, char *out, int count) * PadLock microcode needs it that big. */ char buf[128+16]; char *result = NEAREST_ALIGNED(buf); + int ts_state; ((uint32_t *)result)[0] = SHA1_H0; ((uint32_t *)result)[1] = SHA1_H1; @@ -109,9 +111,12 @@ static void padlock_do_sha1(const char *in, char *out, int count) ((uint32_t *)result)[3] = SHA1_H3; ((uint32_t *)result)[4] = SHA1_H4; + /* prevent taking the spurious DNA fault with padlock. */ + ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0)); + irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); } @@ -123,6 +128,7 @@ static void padlock_do_sha256(const char *in, char *out, int count) * PadLock microcode needs it that big. */ char buf[128+16]; char *result = NEAREST_ALIGNED(buf); + int ts_state; ((uint32_t *)result)[0] = SHA256_H0; ((uint32_t *)result)[1] = SHA256_H1; @@ -133,9 +139,12 @@ static void padlock_do_sha256(const char *in, char *out, int count) ((uint32_t *)result)[6] = SHA256_H6; ((uint32_t *)result)[7] = SHA256_H7; + /* prevent taking the spurious DNA fault with padlock. */ + ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0)); + irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); } diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 96fa844..6d3b210 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,37 @@ static inline void kernel_fpu_end(void) preempt_enable(); } +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context aswell. To prevent these kernel instructions + * in interrupt context interact wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If we are in process context, we are ok to take a spurious DNA fault. + * Otherwise, doing clts() in process context require pre-emption to + * be disabled or some heavy lifting like kernel_fpu_begin() + */ + if (!in_interrupt()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + #ifdef CONFIG_X86_64 static inline void save_init_fpu(struct task_struct *tsk) -- cgit v0.10.2