raid5: add AVX optimized RAID5 checksumming

Optimize RAID5 xor checksumming by taking advantage of 256-bit YMM registers introduced in AVX. Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
author: Jim Kukunas <james.t.kukunas@linux.intel.com> 2012-05-22 03:54:04 (GMT)
committer: NeilBrown <neilb@suse.de> 2012-05-22 03:54:04 (GMT)
commit: ea4d26ae24e58fbd2c61de9242adab053cb982d8 (patch)
tree: 3115dd168f0cf1eb1eb5dd6aecc385cfa0e8bc05 /arch/x86/include/asm/xor_64.h
parent: 56a519913eeba2bdae4d7ee39e80fab442c3836c (diff)
download: linux-ea4d26ae24e58fbd2c61de9242adab053cb982d8.tar.xz
1 files changed, 7 insertions, 1 deletions
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 1549b5e..b9b2323 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = {
 	.do_5 = xor_sse_5,
 };
 
+
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES			\
 do {						\
+	AVX_XOR_SPEED;				\
 	xor_speed(&xor_block_sse);		\
 } while (0)
 
 /* We force the use of the SSE xor block because it can write around L2.
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
-#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+	AVX_SELECT(&xor_block_sse)
 
 #endif /* _ASM_X86_XOR_64_H */
author	Jim Kukunas <james.t.kukunas@linux.intel.com>	2012-05-22 03:54:04 (GMT)
committer	NeilBrown <neilb@suse.de>	2012-05-22 03:54:04 (GMT)
commit	ea4d26ae24e58fbd2c61de9242adab053cb982d8 (patch)
tree	3115dd168f0cf1eb1eb5dd6aecc385cfa0e8bc05 /arch/x86/include/asm/xor_64.h
parent	56a519913eeba2bdae4d7ee39e80fab442c3836c (diff)
download	linux-ea4d26ae24e58fbd2c61de9242adab053cb982d8.tar.xz