summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/aesni-intel_asm.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 21:53:12 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 21:53:12 (GMT)
commit797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch)
tree1383dc469c26ad37fdf960f682d9a48c782935c5 /arch/x86/crypto/aesni-intel_asm.S
parentc8d8566952fda026966784a62f324c8352f77430 (diff)
parent3862de1f6c442d53bd828d39f86d07d933a70605 (diff)
downloadlinux-fsl-qoriq-797994f81a8b2bdca2eecffa415c1e7a89a4f961.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - XTS mode optimisation for twofish/cast6/camellia/aes on x86 - AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia - SSSE3/AVX/AVX2 optimisations for sha256/sha512 - Added driver for SAHARA2 crypto accelerator - Fix for GMAC when used in non-IPsec secnarios - Added generic CMAC implementation (including IPsec glue) - IP update for crypto/atmel - Support for more than one device in hwrng/timeriomem - Added Broadcom BCM2835 RNG driver - Misc fixes * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits) crypto: caam - fix job ring cleanup code crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher crypto: tcrypt - add async cipher speed tests for blowfish crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2 crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86 crypto: aesni_intel - add more optimized XTS mode for x86-64 crypto: x86/camellia-aesni-avx - add more optimized XTS code crypto: cast6-avx: use new optimized XTS code crypto: x86/twofish-avx - use optimized XTS code crypto: x86 - add more optimized XTS-mode for serpent-avx xfrm: add rfc4494 AES-CMAC-96 support crypto: add CMAC support to CryptoAPI crypto: testmgr - add empty test vectors for null ciphers crypto: testmgr - add AES GMAC test vectors crypto: gcm - fix rfc4543 to handle async crypto correctly crypto: gcm - make GMAC work when dst and src are different hwrng: timeriomem - added devicetree hooks ...
Diffstat (limited to 'arch/x86/crypto/aesni-intel_asm.S')
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S117
1 files changed, 117 insertions, 0 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 04b7977..62fe22c 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -34,6 +34,10 @@
#ifdef __x86_64__
.data
+.align 16
+.Lgf128mul_x_ble_mask:
+ .octa 0x00000000000000010000000000000087
+
POLY: .octa 0xC2000000000000000000000000000001
TWOONE: .octa 0x00000001000000000000000000000001
@@ -105,6 +109,8 @@ enc: .octa 0x2
#define CTR %xmm11
#define INC %xmm12
+#define GF128MUL_MASK %xmm10
+
#ifdef __x86_64__
#define AREG %rax
#define KEYP %rdi
@@ -2636,4 +2642,115 @@ ENTRY(aesni_ctr_enc)
.Lctr_enc_just_ret:
ret
ENDPROC(aesni_ctr_enc)
+
+/*
+ * _aesni_gf128mul_x_ble: internal ABI
+ * Multiply in GF(2^128) for XTS IVs
+ * input:
+ * IV: current IV
+ * GF128MUL_MASK == mask with 0x87 and 0x01
+ * output:
+ * IV: next IV
+ * changed:
+ * CTR: == temporary value
+ */
+#define _aesni_gf128mul_x_ble() \
+ pshufd $0x13, IV, CTR; \
+ paddq IV, IV; \
+ psrad $31, CTR; \
+ pand GF128MUL_MASK, CTR; \
+ pxor CTR, IV;
+
+/*
+ * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * bool enc, u8 *iv)
+ */
+ENTRY(aesni_xts_crypt8)
+ cmpb $0, %cl
+ movl $0, %ecx
+ movl $240, %r10d
+ leaq _aesni_enc4, %r11
+ leaq _aesni_dec4, %rax
+ cmovel %r10d, %ecx
+ cmoveq %rax, %r11
+
+ movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+ movups (IVP), IV
+
+ mov 480(KEYP), KLEN
+ addq %rcx, KEYP
+
+ movdqa IV, STATE1
+ pxor 0x00(INP), STATE1
+ movdqu IV, 0x00(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE2
+ pxor 0x10(INP), STATE2
+ movdqu IV, 0x10(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE3
+ pxor 0x20(INP), STATE3
+ movdqu IV, 0x20(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE4
+ pxor 0x30(INP), STATE4
+ movdqu IV, 0x30(OUTP)
+
+ call *%r11
+
+ pxor 0x00(OUTP), STATE1
+ movdqu STATE1, 0x00(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE1
+ pxor 0x40(INP), STATE1
+ movdqu IV, 0x40(OUTP)
+
+ pxor 0x10(OUTP), STATE2
+ movdqu STATE2, 0x10(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE2
+ pxor 0x50(INP), STATE2
+ movdqu IV, 0x50(OUTP)
+
+ pxor 0x20(OUTP), STATE3
+ movdqu STATE3, 0x20(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE3
+ pxor 0x60(INP), STATE3
+ movdqu IV, 0x60(OUTP)
+
+ pxor 0x30(OUTP), STATE4
+ movdqu STATE4, 0x30(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE4
+ pxor 0x70(INP), STATE4
+ movdqu IV, 0x70(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movups IV, (IVP)
+
+ call *%r11
+
+ pxor 0x40(OUTP), STATE1
+ movdqu STATE1, 0x40(OUTP)
+
+ pxor 0x50(OUTP), STATE2
+ movdqu STATE2, 0x50(OUTP)
+
+ pxor 0x60(OUTP), STATE3
+ movdqu STATE3, 0x60(OUTP)
+
+ pxor 0x70(OUTP), STATE4
+ movdqu STATE4, 0x70(OUTP)
+
+ ret
+ENDPROC(aesni_xts_crypt8)
+
#endif