summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug12
-rw-r--r--lib/Makefile4
-rw-r--r--lib/asn1_decoder.c8
-rw-r--r--lib/atomic64.c17
-rw-r--r--lib/bug.c1
-rw-r--r--lib/cpu_rmap.c54
-rw-r--r--lib/dma-debug.c66
-rw-r--r--lib/of-reconfig-notifier-error-inject.c (renamed from lib/pSeries-reconfig-notifier-error-inject.c)22
-rw-r--r--lib/raid6/Makefile9
-rw-r--r--lib/raid6/algos.c12
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/avx2.c251
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/recov_avx2.c323
-rw-r--r--lib/raid6/recov_ssse3.c4
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c8
-rw-r--r--lib/raid6/test/Makefile29
-rw-r--r--lib/raid6/x86.h14
-rw-r--r--lib/rbtree.c20
20 files changed, 783 insertions, 78 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e458782..67604e5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -134,7 +134,7 @@ config DEBUG_SECTION_MISMATCH
any use of code/data previously in these sections would
most likely result in an oops.
In the code, functions and variables are annotated with
- __init, __devinit, etc. (see the full list in include/linux/init.h),
+ __init, __cpuinit, etc. (see the full list in include/linux/init.h),
which results in the code/data being placed in specific sections.
The section mismatch analysis is always performed after a full
kernel build, and enabling this option causes the following
@@ -1192,14 +1192,14 @@ config MEMORY_NOTIFIER_ERROR_INJECT
If unsure, say N.
-config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT
- tristate "pSeries reconfig notifier error injection module"
- depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION
+config OF_RECONFIG_NOTIFIER_ERROR_INJECT
+ tristate "OF reconfig notifier error injection module"
+ depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
help
This option provides the ability to inject artificial errors to
- pSeries reconfig notifier chain callbacks. It is controlled
+ OF reconfig notifier chain callbacks. It is controlled
through debugfs interface under
- /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/
+ /sys/kernel/debug/notifier-error-inject/OF-reconfig/
If the notifier call chain should be failed with some events
notified, write the error code to "actions/<notifier event>/error".
diff --git a/lib/Makefile b/lib/Makefile
index 5558e35..02ed6c0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -95,8 +95,8 @@ obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
-obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \
- pSeries-reconfig-notifier-error-inject.o
+obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
+ of-reconfig-notifier-error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 5293d24..11b9b01 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -81,7 +81,7 @@ next_tag:
goto next_tag;
}
- if (unlikely((tag & 0x1f) == 0x1f)) {
+ if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) {
do {
if (unlikely(datalen - dp < 2))
goto data_overrun_error;
@@ -96,7 +96,7 @@ next_tag:
goto next_tag;
}
- if (unlikely(len == 0x80)) {
+ if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
/* Indefinite length */
if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5))
goto indefinite_len_primitive;
@@ -222,7 +222,7 @@ next_op:
if (unlikely(dp >= datalen - 1))
goto data_overrun_error;
tag = data[dp++];
- if (unlikely((tag & 0x1f) == 0x1f))
+ if (unlikely((tag & 0x1f) == ASN1_LONG_TAG))
goto long_tag_not_supported;
if (op & ASN1_OP_MATCH__ANY) {
@@ -254,7 +254,7 @@ next_op:
len = data[dp++];
if (len > 0x7f) {
- if (unlikely(len == 0x80)) {
+ if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
/* Indefinite length */
if (unlikely(!(tag & ASN1_CONS_BIT)))
goto indefinite_len_primitive;
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 9785378..08a4f06 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -31,7 +31,11 @@
static union {
raw_spinlock_t lock;
char pad[L1_CACHE_BYTES];
-} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
+ [0 ... (NR_LOCKS - 1)] = {
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+ },
+};
static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
{
@@ -173,14 +177,3 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
return ret;
}
EXPORT_SYMBOL(atomic64_add_unless);
-
-static int init_atomic64_lock(void)
-{
- int i;
-
- for (i = 0; i < NR_LOCKS; ++i)
- raw_spin_lock_init(&atomic64_lock[i].lock);
- return 0;
-}
-
-pure_initcall(init_atomic64_lock);
diff --git a/lib/bug.c b/lib/bug.c
index a28c141..d0cdf14 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -55,6 +55,7 @@ static inline unsigned long bug_addr(const struct bug_entry *bug)
}
#ifdef CONFIG_MODULES
+/* Updates are protected by module mutex */
static LIST_HEAD(module_bug_list);
static const struct bug_entry *module_find_bug(unsigned long bugaddr)
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 145dec5..5fbed5c 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
if (!rmap)
return NULL;
+ kref_init(&rmap->refcount);
rmap->obj = (void **)((char *)rmap + obj_offset);
/* Initially assign CPUs to objects on a rota, since we have
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
}
EXPORT_SYMBOL(alloc_cpu_rmap);
+/**
+ * cpu_rmap_release - internal reclaiming helper called from kref_put
+ * @ref: kref to struct cpu_rmap
+ */
+static void cpu_rmap_release(struct kref *ref)
+{
+ struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
+ kfree(rmap);
+}
+
+/**
+ * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+static inline void cpu_rmap_get(struct cpu_rmap *rmap)
+{
+ kref_get(&rmap->refcount);
+}
+
+/**
+ * cpu_rmap_put - release ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+int cpu_rmap_put(struct cpu_rmap *rmap)
+{
+ return kref_put(&rmap->refcount, cpu_rmap_release);
+}
+EXPORT_SYMBOL(cpu_rmap_put);
+
/* Reevaluate nearest object for given CPU, comparing with the given
* neighbours at the given distance.
*/
@@ -197,8 +227,7 @@ struct irq_glue {
* free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
* @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
*
- * Must be called in process context, before freeing the IRQs, and
- * without holding any locks required by global workqueue items.
+ * Must be called in process context, before freeing the IRQs.
*/
void free_irq_cpu_rmap(struct cpu_rmap *rmap)
{
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
glue = rmap->obj[index];
irq_set_affinity_notifier(glue->notify.irq, NULL);
}
- irq_run_affinity_notifiers();
- kfree(rmap);
+ cpu_rmap_put(rmap);
}
EXPORT_SYMBOL(free_irq_cpu_rmap);
+/**
+ * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
+ * @notify: struct irq_affinity_notify passed by irq/manage.c
+ * @mask: cpu mask for new SMP affinity
+ *
+ * This is executed in workqueue context.
+ */
static void
irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
{
@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
}
+/**
+ * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
+ * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
+ */
static void irq_cpu_rmap_release(struct kref *ref)
{
struct irq_glue *glue =
container_of(ref, struct irq_glue, notify.kref);
+
+ cpu_rmap_put(glue->rmap);
kfree(glue);
}
@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
glue->notify.notify = irq_cpu_rmap_notify;
glue->notify.release = irq_cpu_rmap_release;
glue->rmap = rmap;
+ cpu_rmap_get(rmap);
glue->index = cpu_rmap_add(rmap, glue);
rc = irq_set_affinity_notifier(irq, &glue->notify);
- if (rc)
+ if (rc) {
+ cpu_rmap_put(glue->rmap);
kfree(glue);
+ }
return rc;
}
EXPORT_SYMBOL(irq_cpu_rmap_add);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d84beb9..5e396ac 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -45,6 +45,12 @@ enum {
dma_debug_coherent,
};
+enum map_err_types {
+ MAP_ERR_CHECK_NOT_APPLICABLE,
+ MAP_ERR_NOT_CHECKED,
+ MAP_ERR_CHECKED,
+};
+
#define DMA_DEBUG_STACKTRACE_ENTRIES 5
struct dma_debug_entry {
@@ -57,6 +63,7 @@ struct dma_debug_entry {
int direction;
int sg_call_ents;
int sg_mapped_ents;
+ enum map_err_types map_err_type;
#ifdef CONFIG_STACKTRACE
struct stack_trace stacktrace;
unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly;
static DEFINE_RWLOCK(driver_name_lock);
+static const char *const maperr2str[] = {
+ [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+ [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+ [MAP_ERR_CHECKED] = "dma map error checked",
+};
+
static const char *type2name[4] = { "single", "page",
"scather-gather", "coherent" };
@@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev)
list_for_each_entry(entry, &bucket->list, list) {
if (!dev || dev == entry->dev) {
dev_info(entry->dev,
- "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+ "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
type2name[entry->type], idx,
(unsigned long long)entry->paddr,
entry->dev_addr, entry->size,
- dir2name[entry->direction]);
+ dir2name[entry->direction],
+ maperr2str[entry->map_err_type]);
}
}
@@ -844,16 +858,16 @@ static void check_unmap(struct dma_debug_entry *ref)
struct hash_bucket *bucket;
unsigned long flags;
- if (dma_mapping_error(ref->dev, ref->dev_addr)) {
- err_printk(ref->dev, NULL, "DMA-API: device driver tries "
- "to free an invalid DMA memory address\n");
- return;
- }
-
bucket = get_hash_bucket(ref, &flags);
entry = bucket_find_exact(bucket, ref);
if (!entry) {
+ if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+ err_printk(ref->dev, NULL,
+ "DMA-API: device driver tries "
+ "to free an invalid DMA memory address\n");
+ return;
+ }
err_printk(ref->dev, NULL, "DMA-API: device driver tries "
"to free DMA memory it has not allocated "
"[device address=0x%016llx] [size=%llu bytes]\n",
@@ -910,6 +924,15 @@ static void check_unmap(struct dma_debug_entry *ref)
dir2name[ref->direction]);
}
+ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+ err_printk(ref->dev, entry,
+ "DMA-API: device driver failed to check map error"
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped as %s]",
+ ref->dev_addr, ref->size,
+ type2name[entry->type]);
+ }
+
hash_bucket_del(entry);
dma_entry_free(entry);
@@ -1017,7 +1040,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
if (unlikely(global_disable))
return;
- if (unlikely(dma_mapping_error(dev, dma_addr)))
+ if (dma_mapping_error(dev, dma_addr))
return;
entry = dma_entry_alloc();
@@ -1030,6 +1053,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->dev_addr = dma_addr;
entry->size = size;
entry->direction = direction;
+ entry->map_err_type = MAP_ERR_NOT_CHECKED;
if (map_single)
entry->type = dma_debug_single;
@@ -1045,6 +1069,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
}
EXPORT_SYMBOL(debug_dma_map_page);
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ struct dma_debug_entry ref;
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.dev = dev;
+ ref.dev_addr = dma_addr;
+ bucket = get_hash_bucket(&ref, &flags);
+ entry = bucket_find_exact(bucket, &ref);
+
+ if (!entry)
+ goto out;
+
+ entry->map_err_type = MAP_ERR_CHECKED;
+out:
+ put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
size_t size, int direction, bool map_single)
{
diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/of-reconfig-notifier-error-inject.c
index 7f7c98d..8dc7986 100644
--- a/lib/pSeries-reconfig-notifier-error-inject.c
+++ b/lib/of-reconfig-notifier-error-inject.c
@@ -1,20 +1,20 @@
#include <linux/kernel.h>
#include <linux/module.h>
-
-#include <asm/pSeries_reconfig.h>
+#include <linux/of.h>
#include "notifier-error-inject.h"
static int priority;
module_param(priority, int, 0);
-MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority");
+MODULE_PARM_DESC(priority, "specify OF reconfig notifier priority");
static struct notifier_err_inject reconfig_err_inject = {
.actions = {
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_ADD) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_REMOVE) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_ADD) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_REMOVE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ATTACH_NODE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_DETACH_NODE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ADD_PROPERTY) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_REMOVE_PROPERTY) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_UPDATE_PROPERTY) },
{}
}
};
@@ -25,12 +25,12 @@ static int err_inject_init(void)
{
int err;
- dir = notifier_err_inject_init("pSeries-reconfig",
+ dir = notifier_err_inject_init("OF-reconfig",
notifier_err_inject_dir, &reconfig_err_inject, priority);
if (IS_ERR(dir))
return PTR_ERR(dir);
- err = pSeries_reconfig_notifier_register(&reconfig_err_inject.nb);
+ err = of_reconfig_notifier_register(&reconfig_err_inject.nb);
if (err)
debugfs_remove_recursive(dir);
@@ -39,13 +39,13 @@ static int err_inject_init(void)
static void err_inject_exit(void)
{
- pSeries_reconfig_notifier_unregister(&reconfig_err_inject.nb);
+ of_reconfig_notifier_unregister(&reconfig_err_inject.nb);
debugfs_remove_recursive(dir);
}
module_init(err_inject_init);
module_exit(err_inject_exit);
-MODULE_DESCRIPTION("pSeries reconfig notifier error injection module");
+MODULE_DESCRIPTION("OF reconfig notifier error injection module");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe..9f7c184 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -1,8 +1,11 @@
obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
-raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
- int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
- altivec8.o mmx.o sse1.o sse2.o
+raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
+ int8.o int16.o int32.o
+
+raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+
hostprogs-y += mktables
quiet_cmd_unroll = UNROLL $@
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f5..6d7316f 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_sse1x2,
&raid6_sse2x1,
&raid6_sse2x2,
+#ifdef CONFIG_AS_AVX2
+ &raid6_avx2x1,
+ &raid6_avx2x2,
+#endif
#endif
#if defined(__x86_64__) && !defined(__arch_um__)
&raid6_sse2x1,
&raid6_sse2x2,
&raid6_sse2x4,
+#ifdef CONFIG_AS_AVX2
+ &raid6_avx2x1,
+ &raid6_avx2x2,
+ &raid6_avx2x4,
+#endif
#endif
#ifdef CONFIG_ALTIVEC
&raid6_altivec1,
@@ -72,6 +81,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov);
const struct raid6_recov_calls *const raid6_recov_algos[] = {
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef CONFIG_AS_AVX2
+ &raid6_recov_avx2,
+#endif
&raid6_recov_ssse3,
#endif
&raid6_recov_intx1,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index b71012b..7cc12b5 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,13 +24,10 @@
#include <linux/raid/pq.h>
-#ifdef CONFIG_ALTIVEC
-
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
-#endif
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 0000000..bc3b1dd
--- /dev/null
+++ b/lib/raid6/avx2.c
@@ -0,0 +1,251 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
+ *
+ * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * AVX2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx2_constants {
+ u64 x1d[4];
+} raid6_avx2_constants __aligned(32) = {
+ { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx2(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
+}
+
+/*
+ * Plain AVX2 implementation
+ */
+static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
+
+ for (d = 0; d < bytes; d += 32) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+ asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
+ asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
+ for (z = z0-2; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+ asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
+ }
+ asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x1 = {
+ raid6_avx21_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x1",
+ 1 /* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX2 implementation
+ */
+static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+
+ /* We uniformly assume a single prefetch covers at least 32 bytes */
+ for (d = 0; d < bytes; d += 64) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
+ asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+ asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
+ asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
+ asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
+ for (z = z0-1; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+ asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+ asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpand %ymm0,%ymm7,%ymm7");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+ asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+ asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ }
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x2 = {
+ raid6_avx22_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x2",
+ 1 /* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
+ asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
+ asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
+ asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
+ asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
+ asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
+
+ for (d = 0; d < bytes; d += 128) {
+ for (z = z0; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
+ asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+ asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+ asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
+ asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+ asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
+ asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpand %ymm0,%ymm7,%ymm7");
+ asm volatile("vpand %ymm0,%ymm13,%ymm13");
+ asm volatile("vpand %ymm0,%ymm15,%ymm15");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+ asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+ asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+ asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+ asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
+ asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
+ asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile("vpxor %ymm13,%ymm10,%ymm10");
+ asm volatile("vpxor %ymm15,%ymm11,%ymm11");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+ asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+ }
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+ asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3");
+ asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
+ asm volatile("vpxor %ymm10,%ymm10,%ymm10");
+ asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
+ asm volatile("vpxor %ymm11,%ymm11,%ymm11");
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+ asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+ asm volatile("vpxor %ymm6,%ymm6,%ymm6");
+ asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
+ asm volatile("vpxor %ymm12,%ymm12,%ymm12");
+ asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
+ asm volatile("vpxor %ymm14,%ymm14,%ymm14");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x4 = {
+ raid6_avx24_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x4",
+ 1 /* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX2 */
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 279347f..590c71c 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -16,7 +16,7 @@
* MMX implementation of RAID-6 syndrome functions
*/
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
#include <linux/raid/pq.h>
#include "x86.h"
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 0000000..e1eea43
--- /dev/null
+++ b/lib/raid6/recov_avx2.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#if CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx2(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX);
+}
+
+static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks-2] = p;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /* ymm0 = x0f[16] */
+ asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
+ asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
+ asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
+ asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
+ asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
+ asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
+ asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
+ asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
+
+ /*
+ * 1 = dq[0] ^ q[0]
+ * 9 = dq[32] ^ q[32]
+ * 0 = dp[0] ^ p[0]
+ * 8 = dp[32] ^ p[32]
+ */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %ymm1, %ymm3");
+ asm volatile("vpsraw $4, %ymm9, %ymm12");
+ asm volatile("vpand %ymm7, %ymm1, %ymm1");
+ asm volatile("vpand %ymm7, %ymm9, %ymm9");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm12, %ymm12");
+ asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
+ asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
+ asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+ asm volatile("vpxor %ymm14, %ymm15, %ymm15");
+ asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+ /*
+ * 5 = qx[0]
+ * 15 = qx[32]
+ */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+ asm volatile("vpsraw $4, %ymm0, %ymm2");
+ asm volatile("vpsraw $4, %ymm8, %ymm6");
+ asm volatile("vpand %ymm7, %ymm0, %ymm3");
+ asm volatile("vpand %ymm7, %ymm8, %ymm14");
+ asm volatile("vpand %ymm7, %ymm2, %ymm2");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
+ asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
+ asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm12, %ymm13, %ymm13");
+
+ /*
+ * 1 = pbmul[px[0]]
+ * 13 = pbmul[px[32]]
+ */
+ asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm15, %ymm13, %ymm13");
+
+ /*
+ * 1 = db = DQ
+ * 13 = db[32] = DQ[32]
+ */
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
+ asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+ asm volatile("vpxor %ymm13, %ymm8, %ymm8");
+
+ asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+ asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+#else
+ asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
+ asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
+ asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
+ asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
+
+ /* 1 = dq ^ q; 0 = dp ^ p */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+ /*
+ * 1 = dq ^ q
+ * 3 = dq ^ p >> 4
+ */
+ asm volatile("vpsraw $4, %ymm1, %ymm3");
+ asm volatile("vpand %ymm7, %ymm1, %ymm1");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+ asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+ /* 5 = qx */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+
+ asm volatile("vpsraw $4, %ymm0, %ymm2");
+ asm volatile("vpand %ymm7, %ymm0, %ymm3");
+ asm volatile("vpand %ymm7, %ymm2, %ymm2");
+ asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+
+ /* 1 = pbmul[px] */
+ asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+ /* 1 = db = DQ */
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+
+ asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+ asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+
+ bytes -= 32;
+ p += 32;
+ q += 32;
+ dp += 32;
+ dq += 32;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+ asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
+ asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+ asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
+
+ /*
+ * 3 = q[0] ^ dq[0]
+ * 8 = q[32] ^ dq[32]
+ */
+ asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+ asm volatile("vmovapd %ymm0, %ymm13");
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+ asm volatile("vmovapd %ymm1, %ymm14");
+
+ asm volatile("vpsraw $4, %ymm3, %ymm6");
+ asm volatile("vpsraw $4, %ymm8, %ymm12");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm8, %ymm8");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpand %ymm7, %ymm12, %ymm12");
+ asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+ asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+ asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
+ asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm13, %ymm14, %ymm14");
+
+ /*
+ * 1 = qmul[q[0] ^ dq[0]]
+ * 14 = qmul[q[32] ^ dq[32]]
+ */
+ asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+ asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
+ asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+ asm volatile("vpxor %ymm14, %ymm12, %ymm12");
+
+ /*
+ * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
+ * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
+ */
+
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
+ asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+ asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+#else
+ asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+ asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+
+ /* 3 = q ^ dq */
+
+ asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %ymm3, %ymm6");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+
+ /* 1 = qmul[q ^ dq] */
+
+ asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+ asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+
+ /* 2 = p ^ qmul[q ^ dq] */
+
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+
+ bytes -= 32;
+ p += 32;
+ q += 32;
+ dq += 32;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx2 = {
+ .data2 = raid6_2data_recov_avx2,
+ .datap = raid6_datap_recov_avx2,
+ .valid = raid6_has_avx2,
+#ifdef CONFIG_X86_64
+ .name = "avx2x2",
+#else
+ .name = "avx2x1",
+#endif
+ .priority = 2,
+};
+
+#else
+#warning "your version of binutils lacks AVX2 support"
+#endif
diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c
index ecb710c..a916832 100644
--- a/lib/raid6/recov_ssse3.c
+++ b/lib/raid6/recov_ssse3.c
@@ -7,8 +7,6 @@
* of the License.
*/
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
#include <linux/raid/pq.h>
#include "x86.h"
@@ -332,5 +330,3 @@ const struct raid6_recov_calls raid6_recov_ssse3 = {
#endif
.priority = 1,
};
-
-#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index 10dd9194..f762971 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -21,7 +21,7 @@
* worthwhile as a separate implementation.
*/
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
#include <linux/raid/pq.h>
#include "x86.h"
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index bc2d57d..85b82c8 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -17,8 +17,6 @@
*
*/
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
#include <linux/raid/pq.h>
#include "x86.h"
@@ -159,9 +157,7 @@ const struct raid6_calls raid6_sse2x2 = {
1 /* Has cache hints */
};
-#endif
-
-#if defined(__x86_64__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_64
/*
* Unrolled-by-4 SSE2 implementation
@@ -259,4 +255,4 @@ const struct raid6_calls raid6_sse2x4 = {
1 /* Has cache hints */
};
-#endif
+#endif /* CONFIG_X86_64 */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d..087332d 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -10,6 +10,31 @@ LD = ld
AWK = awk -f
AR = ar
RANLIB = ranlib
+OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
+
+ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ CFLAGS += -DCONFIG_X86_32
+ IS_X86 = yes
+endif
+ifeq ($(ARCH),x86_64)
+ CFLAGS += -DCONFIG_X86_64
+ IS_X86 = yes
+endif
+
+ifeq ($(IS_X86),yes)
+ OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
+ CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
+ gcc -c -x assembler - >&/dev/null && \
+ rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+else
+ HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
+ gcc -c -x c - >&/dev/null && \
+ rm ./-.o && echo yes)
+ ifeq ($(HAS_ALTIVEC),yes)
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ endif
+endif
.c.o:
$(CC) $(CFLAGS) -c -o $@ $<
@@ -22,9 +47,7 @@ RANLIB = ranlib
all: raid6.a raid6test
-raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
- altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
- tables.o
+raid6.a: $(OBJS)
rm -f $@
$(AR) cq $@ $^
$(RANLIB) $@
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d632..b759548 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
/* Should work well enough on modern CPUs for testing */
static inline int boot_cpu_has(int flag)
{
- u32 eax = (flag & 0x20) ? 0x80000001 : 1;
- u32 ecx, edx;
+ u32 eax, ebx, ecx, edx;
+
+ eax = (flag & 0x100) ? 7 :
+ (flag & 0x20) ? 0x80000001 : 1;
+ ecx = 0;
asm volatile("cpuid"
- : "+a" (eax), "=d" (edx), "=c" (ecx)
- : : "ebx");
+ : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
- return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+ return ((flag & 0x100 ? ebx :
+ (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
}
#endif /* ndef __KERNEL__ */
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 4f56a11..c0e31fe 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
}
-__always_inline void
-__rb_erase_color(struct rb_node *parent, struct rb_root *root,
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
@@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
}
}
}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ ____rb_erase_color(parent, root, augment_rotate);
+}
EXPORT_SYMBOL(__rb_erase_color);
/*
@@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color);
void rb_erase(struct rb_node *node, struct rb_root *root)
{
- rb_erase_augmented(node, root, &dummy_callbacks);
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, root, dummy_rotate);
}
EXPORT_SYMBOL(rb_erase);