summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/glue_helper-asm-avx.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 21:53:12 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 21:53:12 (GMT)
commit797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch)
tree1383dc469c26ad37fdf960f682d9a48c782935c5 /arch/x86/crypto/glue_helper-asm-avx.S
parentc8d8566952fda026966784a62f324c8352f77430 (diff)
parent3862de1f6c442d53bd828d39f86d07d933a70605 (diff)
downloadlinux-fsl-qoriq-797994f81a8b2bdca2eecffa415c1e7a89a4f961.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - XTS mode optimisation for twofish/cast6/camellia/aes on x86 - AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia - SSSE3/AVX/AVX2 optimisations for sha256/sha512 - Added driver for SAHARA2 crypto accelerator - Fix for GMAC when used in non-IPsec secnarios - Added generic CMAC implementation (including IPsec glue) - IP update for crypto/atmel - Support for more than one device in hwrng/timeriomem - Added Broadcom BCM2835 RNG driver - Misc fixes * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits) crypto: caam - fix job ring cleanup code crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher crypto: tcrypt - add async cipher speed tests for blowfish crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2 crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86 crypto: aesni_intel - add more optimized XTS mode for x86-64 crypto: x86/camellia-aesni-avx - add more optimized XTS code crypto: cast6-avx: use new optimized XTS code crypto: x86/twofish-avx - use optimized XTS code crypto: x86 - add more optimized XTS-mode for serpent-avx xfrm: add rfc4494 AES-CMAC-96 support crypto: add CMAC support to CryptoAPI crypto: testmgr - add empty test vectors for null ciphers crypto: testmgr - add AES GMAC test vectors crypto: gcm - fix rfc4543 to handle async crypto correctly crypto: gcm - make GMAC work when dst and src are different hwrng: timeriomem - added devicetree hooks ...
Diffstat (limited to 'arch/x86/crypto/glue_helper-asm-avx.S')
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S61
1 files changed, 60 insertions, 1 deletions
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
index f7b6ea2..02ee230 100644
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -1,7 +1,7 @@
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -89,3 +89,62 @@
vpxor (6*16)(src), x6, x6; \
vpxor (7*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
+
+#define gf128mul_x_ble(iv, mask, tmp) \
+ vpsrad $31, iv, tmp; \
+ vpaddq iv, iv, iv; \
+ vpshufd $0x13, tmp, tmp; \
+ vpand mask, tmp, tmp; \
+ vpxor tmp, iv, iv;
+
+#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
+ t1, xts_gf128mul_and_shl1_mask) \
+ vmovdqa xts_gf128mul_and_shl1_mask, t0; \
+ \
+ /* load IV */ \
+ vmovdqu (iv), tiv; \
+ vpxor (0*16)(src), tiv, x0; \
+ vmovdqu tiv, (0*16)(dst); \
+ \
+ /* construct and store IVs, also xor with source */ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (1*16)(src), tiv, x1; \
+ vmovdqu tiv, (1*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (2*16)(src), tiv, x2; \
+ vmovdqu tiv, (2*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (3*16)(src), tiv, x3; \
+ vmovdqu tiv, (3*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (4*16)(src), tiv, x4; \
+ vmovdqu tiv, (4*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (5*16)(src), tiv, x5; \
+ vmovdqu tiv, (5*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (6*16)(src), tiv, x6; \
+ vmovdqu tiv, (6*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vpxor (7*16)(src), tiv, x7; \
+ vmovdqu tiv, (7*16)(dst); \
+ \
+ gf128mul_x_ble(tiv, t0, t1); \
+ vmovdqu tiv, (iv);
+
+#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
+ vpxor (0*16)(dst), x0, x0; \
+ vpxor (1*16)(dst), x1, x1; \
+ vpxor (2*16)(dst), x2, x2; \
+ vpxor (3*16)(dst), x3, x3; \
+ vpxor (4*16)(dst), x4, x4; \
+ vpxor (5*16)(dst), x5, x5; \
+ vpxor (6*16)(dst), x6, x6; \
+ vpxor (7*16)(dst), x7, x7; \
+ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);