From f7d71f2052555ae57b47322f2c2f6c29ff2438ae Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Fri, 19 Jun 2015 11:50:00 -0400
Subject: locking/qrwlock: Rename functions to queued_*()

To sync up with the naming convention used in qspinlock, all the
qrwlock functions were renamed to started with "queued" instead of
"queue".

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1434729002-57724-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index ae0e241..a8810bf 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -4,8 +4,8 @@
 #include <asm-generic/qrwlock_types.h>
 
 #ifndef CONFIG_X86_PPRO_FENCE
-#define queue_write_unlock queue_write_unlock
-static inline void queue_write_unlock(struct qrwlock *lock)
+#define queued_write_unlock queued_write_unlock
+static inline void queued_write_unlock(struct qrwlock *lock)
 {
         barrier();
         ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 6383d54..55e3ee1 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -36,33 +36,33 @@
 /*
  * External function declarations
  */
-extern void queue_read_lock_slowpath(struct qrwlock *lock);
-extern void queue_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_lock_slowpath(struct qrwlock *lock);
+extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
- * queue_read_can_lock- would read_trylock() succeed?
+ * queued_read_can_lock- would read_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_read_can_lock(struct qrwlock *lock)
+static inline int queued_read_can_lock(struct qrwlock *lock)
 {
 	return !(atomic_read(&lock->cnts) & _QW_WMASK);
 }
 
 /**
- * queue_write_can_lock- would write_trylock() succeed?
+ * queued_write_can_lock- would write_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_write_can_lock(struct qrwlock *lock)
+static inline int queued_write_can_lock(struct qrwlock *lock)
 {
 	return !atomic_read(&lock->cnts);
 }
 
 /**
- * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * queued_read_trylock - try to acquire read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_read_trylock(struct qrwlock *lock)
+static inline int queued_read_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -77,11 +77,11 @@ static inline int queue_read_trylock(struct qrwlock *lock)
 }
 
 /**
- * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * queued_write_trylock - try to acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_write_trylock(struct qrwlock *lock)
+static inline int queued_write_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -93,10 +93,10 @@ static inline int queue_write_trylock(struct qrwlock *lock)
 				     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
- * queue_read_lock - acquire read lock of a queue rwlock
+ * queued_read_lock - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
  */
-static inline void queue_read_lock(struct qrwlock *lock)
+static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -105,27 +105,27 @@ static inline void queue_read_lock(struct qrwlock *lock)
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
-	queue_read_lock_slowpath(lock);
+	queued_read_lock_slowpath(lock);
 }
 
 /**
- * queue_write_lock - acquire write lock of a queue rwlock
+ * queued_write_lock - acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_lock(struct qrwlock *lock)
+static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
 	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
-	queue_write_lock_slowpath(lock);
+	queued_write_lock_slowpath(lock);
 }
 
 /**
- * queue_read_unlock - release read lock of a queue rwlock
+ * queued_read_unlock - release read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_read_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
 {
 	/*
 	 * Atomically decrement the reader count
@@ -134,12 +134,12 @@ static inline void queue_read_unlock(struct qrwlock *lock)
 	atomic_sub(_QR_BIAS, &lock->cnts);
 }
 
-#ifndef queue_write_unlock
+#ifndef queued_write_unlock
 /**
- * queue_write_unlock - release write lock of a queue rwlock
+ * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_unlock(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
 {
 	/*
 	 * If the writer field is atomic, it can be cleared directly.
@@ -154,13 +154,13 @@ static inline void queue_write_unlock(struct qrwlock *lock)
  * Remapping rwlock architecture specific functions to the corresponding
  * queue rwlock functions.
  */
-#define arch_read_can_lock(l)	queue_read_can_lock(l)
-#define arch_write_can_lock(l)	queue_write_can_lock(l)
-#define arch_read_lock(l)	queue_read_lock(l)
-#define arch_write_lock(l)	queue_write_lock(l)
-#define arch_read_trylock(l)	queue_read_trylock(l)
-#define arch_write_trylock(l)	queue_write_trylock(l)
-#define arch_read_unlock(l)	queue_read_unlock(l)
-#define arch_write_unlock(l)	queue_write_unlock(l)
+#define arch_read_can_lock(l)	queued_read_can_lock(l)
+#define arch_write_can_lock(l)	queued_write_can_lock(l)
+#define arch_read_lock(l)	queued_read_lock(l)
+#define arch_write_lock(l)	queued_write_lock(l)
+#define arch_read_trylock(l)	queued_read_trylock(l)
+#define arch_write_trylock(l)	queued_write_trylock(l)
+#define arch_read_unlock(l)	queued_read_unlock(l)
+#define arch_write_unlock(l)	queued_write_unlock(l)
 
 #endif /* __ASM_GENERIC_QRWLOCK_H */
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 6c5da483..49057d4 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -60,10 +60,10 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 }
 
 /**
- * queue_read_lock_slowpath - acquire read lock of a queue rwlock
+ * queued_read_lock_slowpath - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
  */
-void queue_read_lock_slowpath(struct qrwlock *lock)
+void queued_read_lock_slowpath(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -104,13 +104,13 @@ void queue_read_lock_slowpath(struct qrwlock *lock)
 	 */
 	arch_spin_unlock(&lock->lock);
 }
-EXPORT_SYMBOL(queue_read_lock_slowpath);
+EXPORT_SYMBOL(queued_read_lock_slowpath);
 
 /**
- * queue_write_lock_slowpath - acquire write lock of a queue rwlock
+ * queued_write_lock_slowpath - acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-void queue_write_lock_slowpath(struct qrwlock *lock)
+void queued_write_lock_slowpath(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -149,4 +149,4 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 unlock:
 	arch_spin_unlock(&lock->lock);
 }
-EXPORT_SYMBOL(queue_write_lock_slowpath);
+EXPORT_SYMBOL(queued_write_lock_slowpath);
-- 
cgit v0.10.2


From 0e06e5be70d392aa842c1455ec2d0baf62aeed48 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Fri, 19 Jun 2015 11:50:01 -0400
Subject: locking/qrwlock: Better optimization for interrupt context readers

The qrwlock is fair in the process context, but becoming unfair when
in the interrupt context to support use cases like the tasklist_lock.

The current code isn't that well-documented on what happens when
in the interrupt context. The rspin_until_writer_unlock() will only
spin if the writer has gotten the lock. If the writer is still in the
waiting state, the increment in the reader count will cause the writer
to remain in the waiting state and the new interrupt context reader
will get the lock and return immediately. The current code, however,
does an additional read of the lock value which is not necessary as
the information has already been there in the fast path. This may
sometime cause an additional cacheline transfer when the lock is
highly contended.

This patch passes the lock value information gotten in the fast path
to the slow path to eliminate the additional read. It also documents
the action for the interrupt context readers more clearly.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1434729002-57724-3-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 55e3ee1..deb9e8b 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -36,7 +36,7 @@
 /*
  * External function declarations
  */
-extern void queued_read_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts);
 extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
@@ -105,7 +105,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
-	queued_read_lock_slowpath(lock);
+	queued_read_lock_slowpath(lock, cnts);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 49057d4..d9c36c5 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -62,20 +62,21 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 /**
  * queued_read_lock_slowpath - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
+ * @cnts: Current qrwlock lock value
  */
-void queued_read_lock_slowpath(struct qrwlock *lock)
+void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 {
-	u32 cnts;
-
 	/*
 	 * Readers come here when they cannot get the lock without waiting
 	 */
 	if (unlikely(in_interrupt())) {
 		/*
-		 * Readers in interrupt context will spin until the lock is
-		 * available without waiting in the queue.
+		 * Readers in interrupt context will get the lock immediately
+		 * if the writer is just waiting (not holding the lock yet).
+		 * The rspin_until_writer_unlock() function returns immediately
+		 * in this case. Otherwise, they will spin until the lock
+		 * is available without waiting in the queue.
 		 */
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
 		rspin_until_writer_unlock(lock, cnts);
 		return;
 	}
-- 
cgit v0.10.2


From 767f509ca11269c2bcd92e3972a93096f2173ac0 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 29 Jun 2015 23:26:01 -0700
Subject: futex: Enhance comments in futex_lock_pi() for blocking paths

... serves a bit better to clarify between blocking
and non-blocking code paths.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: http://lkml.kernel.org/r/1435645562-975-2-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index c4a182f..153eb22 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2268,8 +2268,11 @@ static long futex_wait_restart(struct restart_block *restart)
 /*
  * Userspace tried a 0 -> TID atomic transition of the futex value
  * and failed. The kernel side here does the whole locking operation:
- * if there are waiters then it will block, it does PI, etc. (Due to
- * races the kernel might see a 0 value of the futex too.)
+ * if there are waiters then it will block as a consequence of relying
+ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
+ * a 0 value of the futex too.).
+ *
+ * Also serves as futex trylock_pi()'ing, and due semantics.
  */
 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 			 ktime_t *time, int trylock)
@@ -2300,6 +2303,10 @@ retry_private:
 
 	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
 	if (unlikely(ret)) {
+		/*
+		 * Atomic work succeeded and we got the lock,
+		 * or failed. Either way, we do _not_ block.
+		 */
 		switch (ret) {
 		case 1:
 			/* We got the lock. */
-- 
cgit v0.10.2


From ab51fbab39d864f3223e44a2600fd951df261f0b Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 29 Jun 2015 23:26:02 -0700
Subject: futex: Fault/error injection capabilities

Although futexes are well known for being a royal pita,
we really have very little debugging capabilities - except
for relying on tglx's eye half the time.

By simply making use of the existing fault-injection machinery,
we can improve this situation, allowing generating artificial
uaddress faults and deadlock scenarios. Of course, when this is
disabled in production systems, the overhead for failure checks
is practically zero -- so this is very cheap at the same time.
Future work would be nice to now enhance trinity to make use of
this.

There is a special tunable 'ignore-private', which can filter
out private futexes. Given the tsk->make_it_fail filter and
this option, pi futexes can be narrowed down pretty closely.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: http://lkml.kernel.org/r/1435645562-975-3-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 4cf1a2a..415484f 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -15,6 +15,10 @@ o fail_page_alloc
 
   injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
 
+o fail_futex
+
+  injects futex deadlock and uaddr fault errors.
+
 o fail_make_request
 
   injects disk IO errors on devices permitted by setting
@@ -113,6 +117,12 @@ configuration of fault-injection capabilities.
 	specifies the minimum page allocation order to be injected
 	failures.
 
+- /sys/kernel/debug/fail_futex/ignore-private:
+
+	Format: { 'Y' | 'N' }
+	default is 'N', setting it to 'Y' will disable failure injections
+	when dealing with private (address space) futexes.
+
 o Boot option
 
 In order to inject faults while debugfs is not available (early boot time),
@@ -121,6 +131,7 @@ use the boot option:
 	failslab=
 	fail_page_alloc=
 	fail_make_request=
+	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
 How to add new fault injection capability
diff --git a/kernel/futex.c b/kernel/futex.c
index 153eb22..6ea31bb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -64,6 +64,7 @@
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
+#include <linux/fault-inject.h>
 
 #include <asm/futex.h>
 
@@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize;
 
 static struct futex_hash_bucket *futex_queues;
 
+/*
+ * Fault injections for futexes.
+ */
+#ifdef CONFIG_FAIL_FUTEX
+
+static struct {
+	struct fault_attr attr;
+
+	u32 ignore_private;
+} fail_futex = {
+	.attr = FAULT_ATTR_INITIALIZER,
+	.ignore_private = 0,
+};
+
+static int __init setup_fail_futex(char *str)
+{
+	return setup_fault_attr(&fail_futex.attr, str);
+}
+__setup("fail_futex=", setup_fail_futex);
+
+bool should_fail_futex(bool fshared)
+{
+	if (fail_futex.ignore_private && !fshared)
+		return false;
+
+	return should_fail(&fail_futex.attr, 1);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_futex_debugfs(void)
+{
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_futex", NULL,
+					&fail_futex.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	if (!debugfs_create_bool("ignore-private", mode, dir,
+				 &fail_futex.ignore_private)) {
+		debugfs_remove_recursive(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+late_initcall(fail_futex_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+#else
+static inline bool should_fail_futex(bool fshared)
+{
+	return false;
+}
+#endif /* CONFIG_FAIL_FUTEX */
+
 static inline void futex_get_mm(union futex_key *key)
 {
 	atomic_inc(&key->private.mm->mm_count);
@@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(fshared)))
+		return -EFAULT;
+
 	/*
 	 * PROCESS_PRIVATE futexes are fast.
 	 * As the mm cannot disappear under us and the 'key' only needs
@@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	}
 
 again:
+	/* Ignore any VERIFY_READ mapping (futex common case) */
+	if (unlikely(should_fail_futex(fshared)))
+		return -EFAULT;
+
 	err = get_user_pages_fast(address, 1, 1, &page);
 	/*
 	 * If write access is not required (eg. FUTEX_WAIT), try
@@ -516,7 +584,7 @@ again:
 		 * A RO anonymous page will never change and thus doesn't make
 		 * sense for futex operations.
 		 */
-		if (ro) {
+		if (unlikely(should_fail_futex(fshared)) || ro) {
 			err = -EFAULT;
 			goto out;
 		}
@@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
 {
 	u32 uninitialized_var(curval);
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
 		return -EFAULT;
 
@@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
 	if (get_futex_value_locked(&uval, uaddr))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	/*
 	 * Detect deadlocks.
 	 */
 	if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
 		return -EDEADLK;
 
+	if ((unlikely(should_fail_futex(true))))
+		return -EDEADLK;
+
 	/*
 	 * Lookup existing state first. If it exists, try to attach to
 	 * its pi_state.
@@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	 */
 	newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
+	if (unlikely(should_fail_futex(true)))
+		ret = -EFAULT;
+
 	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
 		ret = -EFAULT;
 	else if (curval != uval)
@@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 	if (get_futex_value_locked(&curval, pifutex))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	/*
 	 * Find the top_waiter and determine if there are additional waiters.
 	 * If the caller intends to requeue more than 1 waiter to pifutex,
@@ -2537,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
  * @uaddr:	the futex we initially wait on (non-pi)
  * @flags:	futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
- * 		the same type, no requeueing from private to shared, etc.
+ *		the same type, no requeueing from private to shared, etc.
  * @val:	the expected value of uaddr
  * @abs_time:	absolute timeout
  * @bitset:	32 bit wakeup bitset set by userspace, defaults to all
@@ -3012,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
 		      cmd == FUTEX_WAIT_BITSET ||
 		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+			return -EFAULT;
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e2894b2..22554d6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1542,6 +1542,13 @@ config FAIL_MMC_REQUEST
 	  and to test how the mmc host driver handles retries from
 	  the block device.
 
+config FAIL_FUTEX
+	bool "Fault-injection capability for futexes"
+	select DEBUG_FS
+	depends on FAULT_INJECTION && FUTEX
+	help
+	  Provide fault-injection capability for futexes.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
-- 
cgit v0.10.2


From 1b0b7c1762679a2f8bc359da95649249dfcf4195 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 1 Jul 2015 13:29:48 -0700
Subject: rtmutex: Delete scriptable tester

No one uses this anymore, and this is not the first time the
idea of replacing it with a (now possible) userspace side.
Lock stealing logic was removed long ago in when the lock
was granted to the highest prio.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1435782588-4177-2-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig
index 269c23d..1a8f6f95 100644
--- a/arch/parisc/configs/c8000_defconfig
+++ b/arch/parisc/configs/c8000_defconfig
@@ -242,7 +242,6 @@ CONFIG_LOCKUP_DETECTOR=y
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_RT_MUTEX_TESTER=y
 CONFIG_PROVE_RCU_DELAY=y
 CONFIG_DEBUG_BLOCK_EXT_DEVT=y
 CONFIG_LATENCYTOP=y
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 33b148f..0ffb08f 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -295,7 +295,6 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_RT_MUTEX_TESTER=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_RCU_CPU_STALL_INFO=y
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index e4d193e..f3dfe0d 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -616,7 +616,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_SLUB_DEBUG_ON is not set
 # CONFIG_SLUB_STATS is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 7dd5c99..3694204 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
-obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c
deleted file mode 100644
index 1d96dd0..0000000
--- a/kernel/locking/rtmutex-tester.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * RT-Mutex-tester: scriptable tester for rt mutexes
- *
- * started by Thomas Gleixner:
- *
- *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- */
-#include <linux/device.h>
-#include <linux/kthread.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/sched/rt.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/freezer.h>
-#include <linux/stat.h>
-
-#include "rtmutex.h"
-
-#define MAX_RT_TEST_THREADS	8
-#define MAX_RT_TEST_MUTEXES	8
-
-static spinlock_t rttest_lock;
-static atomic_t rttest_event;
-
-struct test_thread_data {
-	int			opcode;
-	int			opdata;
-	int			mutexes[MAX_RT_TEST_MUTEXES];
-	int			event;
-	struct device		dev;
-};
-
-static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
-static struct task_struct *threads[MAX_RT_TEST_THREADS];
-static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
-
-enum test_opcodes {
-	RTTEST_NOP = 0,
-	RTTEST_SCHEDOT,		/* 1 Sched other, data = nice */
-	RTTEST_SCHEDRT,		/* 2 Sched fifo, data = prio */
-	RTTEST_LOCK,		/* 3 Lock uninterruptible, data = lockindex */
-	RTTEST_LOCKNOWAIT,	/* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
-	RTTEST_LOCKINT,		/* 5 Lock interruptible, data = lockindex */
-	RTTEST_LOCKINTNOWAIT,	/* 6 Lock interruptible no wait in wakeup, data = lockindex */
-	RTTEST_LOCKCONT,	/* 7 Continue locking after the wakeup delay */
-	RTTEST_UNLOCK,		/* 8 Unlock, data = lockindex */
-	/* 9, 10 - reserved for BKL commemoration */
-	RTTEST_SIGNAL = 11,	/* 11 Signal other test thread, data = thread id */
-	RTTEST_RESETEVENT = 98,	/* 98 Reset event counter */
-	RTTEST_RESET = 99,	/* 99 Reset all pending operations */
-};
-
-static int handle_op(struct test_thread_data *td, int lockwakeup)
-{
-	int i, id, ret = -EINVAL;
-
-	switch(td->opcode) {
-
-	case RTTEST_NOP:
-		return 0;
-
-	case RTTEST_LOCKCONT:
-		td->mutexes[td->opdata] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		return 0;
-
-	case RTTEST_RESET:
-		for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
-			if (td->mutexes[i] == 4) {
-				rt_mutex_unlock(&mutexes[i]);
-				td->mutexes[i] = 0;
-			}
-		}
-		return 0;
-
-	case RTTEST_RESETEVENT:
-		atomic_set(&rttest_event, 0);
-		return 0;
-
-	default:
-		if (lockwakeup)
-			return ret;
-	}
-
-	switch(td->opcode) {
-
-	case RTTEST_LOCK:
-	case RTTEST_LOCKNOWAIT:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
-			return ret;
-
-		td->mutexes[id] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		rt_mutex_lock(&mutexes[id]);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = 4;
-		return 0;
-
-	case RTTEST_LOCKINT:
-	case RTTEST_LOCKINTNOWAIT:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
-			return ret;
-
-		td->mutexes[id] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = ret ? 0 : 4;
-		return ret ? -EINTR : 0;
-
-	case RTTEST_UNLOCK:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
-			return ret;
-
-		td->event = atomic_add_return(1, &rttest_event);
-		rt_mutex_unlock(&mutexes[id]);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = 0;
-		return 0;
-
-	default:
-		break;
-	}
-	return ret;
-}
-
-/*
- * Schedule replacement for rtsem_down(). Only called for threads with
- * PF_MUTEX_TESTER set.
- *
- * This allows us to have finegrained control over the event flow.
- *
- */
-void schedule_rt_mutex_test(struct rt_mutex *mutex)
-{
-	int tid, op, dat;
-	struct test_thread_data *td;
-
-	/* We have to lookup the task */
-	for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
-		if (threads[tid] == current)
-			break;
-	}
-
-	BUG_ON(tid == MAX_RT_TEST_THREADS);
-
-	td = &thread_data[tid];
-
-	op = td->opcode;
-	dat = td->opdata;
-
-	switch (op) {
-	case RTTEST_LOCK:
-	case RTTEST_LOCKINT:
-	case RTTEST_LOCKNOWAIT:
-	case RTTEST_LOCKINTNOWAIT:
-		if (mutex != &mutexes[dat])
-			break;
-
-		if (td->mutexes[dat] != 1)
-			break;
-
-		td->mutexes[dat] = 2;
-		td->event = atomic_add_return(1, &rttest_event);
-		break;
-
-	default:
-		break;
-	}
-
-	schedule();
-
-
-	switch (op) {
-	case RTTEST_LOCK:
-	case RTTEST_LOCKINT:
-		if (mutex != &mutexes[dat])
-			return;
-
-		if (td->mutexes[dat] != 2)
-			return;
-
-		td->mutexes[dat] = 3;
-		td->event = atomic_add_return(1, &rttest_event);
-		break;
-
-	case RTTEST_LOCKNOWAIT:
-	case RTTEST_LOCKINTNOWAIT:
-		if (mutex != &mutexes[dat])
-			return;
-
-		if (td->mutexes[dat] != 2)
-			return;
-
-		td->mutexes[dat] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		return;
-
-	default:
-		return;
-	}
-
-	td->opcode = 0;
-
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (td->opcode > 0) {
-			int ret;
-
-			set_current_state(TASK_RUNNING);
-			ret = handle_op(td, 1);
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (td->opcode == RTTEST_LOCKCONT)
-				break;
-			td->opcode = ret;
-		}
-
-		/* Wait for the next command to be executed */
-		schedule();
-	}
-
-	/* Restore previous command and data */
-	td->opcode = op;
-	td->opdata = dat;
-}
-
-static int test_func(void *data)
-{
-	struct test_thread_data *td = data;
-	int ret;
-
-	current->flags |= PF_MUTEX_TESTER;
-	set_freezable();
-	allow_signal(SIGHUP);
-
-	for(;;) {
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (td->opcode > 0) {
-			set_current_state(TASK_RUNNING);
-			ret = handle_op(td, 0);
-			set_current_state(TASK_INTERRUPTIBLE);
-			td->opcode = ret;
-		}
-
-		/* Wait for the next command to be executed */
-		schedule();
-		try_to_freeze();
-
-		if (signal_pending(current))
-			flush_signals(current);
-
-		if(kthread_should_stop())
-			break;
-	}
-	return 0;
-}
-
-/**
- * sysfs_test_command - interface for test commands
- * @dev:	thread reference
- * @buf:	command for actual step
- * @count:	length of buffer
- *
- * command syntax:
- *
- * opcode:data
- */
-static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	struct sched_param schedpar;
-	struct test_thread_data *td;
-	char cmdbuf[32];
-	int op, dat, tid, ret;
-
-	td = container_of(dev, struct test_thread_data, dev);
-	tid = td->dev.id;
-
-	/* strings from sysfs write are not 0 terminated! */
-	if (count >= sizeof(cmdbuf))
-		return -EINVAL;
-
-	/* strip of \n: */
-	if (buf[count-1] == '\n')
-		count--;
-	if (count < 1)
-		return -EINVAL;
-
-	memcpy(cmdbuf, buf, count);
-	cmdbuf[count] = 0;
-
-	if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
-		return -EINVAL;
-
-	switch (op) {
-	case RTTEST_SCHEDOT:
-		schedpar.sched_priority = 0;
-		ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
-		if (ret)
-			return ret;
-		set_user_nice(current, 0);
-		break;
-
-	case RTTEST_SCHEDRT:
-		schedpar.sched_priority = dat;
-		ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
-		if (ret)
-			return ret;
-		break;
-
-	case RTTEST_SIGNAL:
-		send_sig(SIGHUP, threads[tid], 0);
-		break;
-
-	default:
-		if (td->opcode > 0)
-			return -EBUSY;
-		td->opdata = dat;
-		td->opcode = op;
-		wake_up_process(threads[tid]);
-	}
-
-	return count;
-}
-
-/**
- * sysfs_test_status - sysfs interface for rt tester
- * @dev:	thread to query
- * @buf:	char buffer to be filled with thread status info
- */
-static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr,
-				 char *buf)
-{
-	struct test_thread_data *td;
-	struct task_struct *tsk;
-	char *curr = buf;
-	int i;
-
-	td = container_of(dev, struct test_thread_data, dev);
-	tsk = threads[td->dev.id];
-
-	spin_lock(&rttest_lock);
-
-	curr += sprintf(curr,
-		"O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
-		td->opcode, td->event, tsk->state,
-			(MAX_RT_PRIO - 1) - tsk->prio,
-			(MAX_RT_PRIO - 1) - tsk->normal_prio,
-		tsk->pi_blocked_on);
-
-	for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
-		curr += sprintf(curr, "%d", td->mutexes[i]);
-
-	spin_unlock(&rttest_lock);
-
-	curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
-			mutexes[td->dev.id].owner);
-
-	return curr - buf;
-}
-
-static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
-static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
-
-static struct bus_type rttest_subsys = {
-	.name = "rttest",
-	.dev_name = "rttest",
-};
-
-static int init_test_thread(int id)
-{
-	thread_data[id].dev.bus = &rttest_subsys;
-	thread_data[id].dev.id = id;
-
-	threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
-	if (IS_ERR(threads[id]))
-		return PTR_ERR(threads[id]);
-
-	return device_register(&thread_data[id].dev);
-}
-
-static int init_rttest(void)
-{
-	int ret, i;
-
-	spin_lock_init(&rttest_lock);
-
-	for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
-		rt_mutex_init(&mutexes[i]);
-
-	ret = subsys_system_register(&rttest_subsys, NULL);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
-		ret = init_test_thread(i);
-		if (ret)
-			break;
-		ret = device_create_file(&thread_data[i].dev, &dev_attr_status);
-		if (ret)
-			break;
-		ret = device_create_file(&thread_data[i].dev, &dev_attr_command);
-		if (ret)
-			break;
-	}
-
-	printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
-
-	return ret;
-}
-
-device_initcall(init_rttest);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 5674b07..7781d80 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1120,7 +1120,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
 		debug_rt_mutex_print_deadlock(waiter);
 
-		schedule_rt_mutex(lock);
+		schedule();
 
 		raw_spin_lock(&lock->wait_lock);
 		set_current_state(state);
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 7844f8f..4f5f83c 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -15,28 +15,6 @@
 #include <linux/rtmutex.h>
 
 /*
- * The rtmutex in kernel tester is independent of rtmutex debugging. We
- * call schedule_rt_mutex_test() instead of schedule() for the tasks which
- * belong to the tester. That way we can delay the wakeup path of those
- * threads to provoke lock stealing and testing of  complex boosting scenarios.
- */
-#ifdef CONFIG_RT_MUTEX_TESTER
-
-extern void schedule_rt_mutex_test(struct rt_mutex *lock);
-
-#define schedule_rt_mutex(_lock)				\
-  do {								\
-	if (!(current->flags & PF_MUTEX_TESTER))		\
-		schedule();					\
-	else							\
-		schedule_rt_mutex_test(_lock);			\
-  } while (0)
-
-#else
-# define schedule_rt_mutex(_lock)			schedule()
-#endif
-
-/*
  * This is the control structure for tasks blocked on a rt_mutex,
  * which is allocated on the kernel stack on of the blocked task.
  *
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 22554d6..e7b5b65 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -916,12 +916,6 @@ config DEBUG_RT_MUTEXES
 	 This allows rt mutex semantics violations and rt mutex related
 	 deadlocks (lockups) to be detected and reported automatically.
 
-config RT_MUTEX_TESTER
-	bool "Built-in scriptable tester for rt-mutexes"
-	depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN
-	help
-	  This option enables a rt-mutex tester.
-
 config DEBUG_SPINLOCK
 	bool "Spinlock and rw-lock debugging: basic checks"
 	depends on DEBUG_KERNEL
diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh
deleted file mode 100644
index 6b5c83b..0000000
--- a/scripts/rt-tester/check-all.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-function testit ()
-{
- printf "%-30s: " $1
- ./rt-tester.py $1 | grep Pass
-}
-
-testit t2-l1-2rt-sameprio.tst
-testit t2-l1-pi.tst
-testit t2-l1-signal.tst
-#testit t2-l2-2rt-deadlock.tst
-testit t3-l1-pi-1rt.tst
-testit t3-l1-pi-2rt.tst
-testit t3-l1-pi-3rt.tst
-testit t3-l1-pi-signal.tst
-testit t3-l1-pi-steal.tst
-testit t3-l2-pi.tst
-testit t4-l2-pi-deboost.tst
-testit t5-l4-pi-boost-deboost.tst
-testit t5-l4-pi-boost-deboost-setsched.tst
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
deleted file mode 100755
index 6d916c2..0000000
--- a/scripts/rt-tester/rt-tester.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/usr/bin/python
-#
-# rt-mutex tester
-#
-# (C) 2006 Thomas Gleixner <tglx@linutronix.de>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-import os
-import sys
-import getopt
-import shutil
-import string
-
-# Globals
-quiet = 0
-test = 0
-comments = 0
-
-sysfsprefix = "/sys/devices/system/rttest/rttest"
-statusfile = "/status"
-commandfile = "/command"
-
-# Command opcodes
-cmd_opcodes = {
-    "schedother"    : "1",
-    "schedfifo"     : "2",
-    "lock"          : "3",
-    "locknowait"    : "4",
-    "lockint"       : "5",
-    "lockintnowait" : "6",
-    "lockcont"      : "7",
-    "unlock"        : "8",
-    "signal"        : "11",
-    "resetevent"    : "98",
-    "reset"         : "99",
-    }
-
-test_opcodes = {
-    "prioeq"        : ["P" , "eq" , None],
-    "priolt"        : ["P" , "lt" , None],
-    "priogt"        : ["P" , "gt" , None],
-    "nprioeq"       : ["N" , "eq" , None],
-    "npriolt"       : ["N" , "lt" , None],
-    "npriogt"       : ["N" , "gt" , None],
-    "unlocked"      : ["M" , "eq" , 0],
-    "trylock"       : ["M" , "eq" , 1],
-    "blocked"       : ["M" , "eq" , 2],
-    "blockedwake"   : ["M" , "eq" , 3],
-    "locked"        : ["M" , "eq" , 4],
-    "opcodeeq"      : ["O" , "eq" , None],
-    "opcodelt"      : ["O" , "lt" , None],
-    "opcodegt"      : ["O" , "gt" , None],
-    "eventeq"       : ["E" , "eq" , None],
-    "eventlt"       : ["E" , "lt" , None],
-    "eventgt"       : ["E" , "gt" , None],
-    }
-
-# Print usage information
-def usage():
-    print "rt-tester.py <-c -h -q -t> <testfile>"
-    print " -c    display comments after first command"
-    print " -h    help"
-    print " -q    quiet mode"
-    print " -t    test mode (syntax check)"
-    print " testfile: read test specification from testfile"
-    print " otherwise from stdin"
-    return
-
-# Print progress when not in quiet mode
-def progress(str):
-    if not quiet:
-        print str
-
-# Analyse a status value
-def analyse(val, top, arg):
-
-    intval = int(val)
-
-    if top[0] == "M":
-        intval = intval / (10 ** int(arg))
-	intval = intval % 10
-        argval = top[2]
-    elif top[0] == "O":
-        argval = int(cmd_opcodes.get(arg, arg))
-    else:
-        argval = int(arg)
-
-    # progress("%d %s %d" %(intval, top[1], argval))
-
-    if top[1] == "eq" and intval == argval:
-	return 1
-    if top[1] == "lt" and intval < argval:
-        return 1
-    if top[1] == "gt" and intval > argval:
-	return 1
-    return 0
-
-# Parse the commandline
-try:
-    (options, arguments) = getopt.getopt(sys.argv[1:],'chqt')
-except getopt.GetoptError, ex:
-    usage()
-    sys.exit(1)
-
-# Parse commandline options
-for option, value in options:
-    if option == "-c":
-        comments = 1
-    elif option == "-q":
-        quiet = 1
-    elif option == "-t":
-        test = 1
-    elif option == '-h':
-        usage()
-        sys.exit(0)
-
-# Select the input source
-if arguments:
-    try:
-        fd = open(arguments[0])
-    except Exception,ex:
-        sys.stderr.write("File not found %s\n" %(arguments[0]))
-        sys.exit(1)
-else:
-    fd = sys.stdin
-
-linenr = 0
-
-# Read the test patterns
-while 1:
-
-    linenr = linenr + 1
-    line = fd.readline()
-    if not len(line):
-        break
-
-    line = line.strip()
-    parts = line.split(":")
-
-    if not parts or len(parts) < 1:
-        continue
-
-    if len(parts[0]) == 0:
-        continue
-
-    if parts[0].startswith("#"):
-	if comments > 1:
-	    progress(line)
-	continue
-
-    if comments == 1:
-	comments = 2
-
-    progress(line)
-
-    cmd = parts[0].strip().lower()
-    opc = parts[1].strip().lower()
-    tid = parts[2].strip()
-    dat = parts[3].strip()
-
-    try:
-        # Test or wait for a status value
-        if cmd == "t" or cmd == "w":
-            testop = test_opcodes[opc]
-
-            fname = "%s%s%s" %(sysfsprefix, tid, statusfile)
-            if test:
-		print fname
-                continue
-
-            while 1:
-                query = 1
-                fsta = open(fname, 'r')
-                status = fsta.readline().strip()
-                fsta.close()
-                stat = status.split(",")
-                for s in stat:
-		    s = s.strip()
-                    if s.startswith(testop[0]):
-                        # Separate status value
-                        val = s[2:].strip()
-                        query = analyse(val, testop, dat)
-                        break
-                if query or cmd == "t":
-                    break
-
-            progress("   " + status)
-
-            if not query:
-                sys.stderr.write("Test failed in line %d\n" %(linenr))
-		sys.exit(1)
-
-        # Issue a command to the tester
-        elif cmd == "c":
-            cmdnr = cmd_opcodes[opc]
-            # Build command string and sys filename
-            cmdstr = "%s:%s" %(cmdnr, dat)
-            fname = "%s%s%s" %(sysfsprefix, tid, commandfile)
-            if test:
-		print fname
-                continue
-            fcmd = open(fname, 'w')
-            fcmd.write(cmdstr)
-            fcmd.close()
-
-    except Exception,ex:
-    	sys.stderr.write(str(ex))
-        sys.stderr.write("\nSyntax error in line %d\n" %(linenr))
-        if not test:
-            fd.close()
-            sys.exit(1)
-
-# Normal exit pass
-print "Pass"
-sys.exit(0)
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
deleted file mode 100644
index 3710c8b..0000000
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-C: locknowait:		1:	0
-W: locked:		0: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-W: locked:		1: 	0
-
-# Verify T0
-W: unlocked:		0: 	0
-T: prioeq:		0: 	80
-
-# Unlock
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
-# T1,T0 lock L0
-C: locknowait:		1: 	0
-C: locknowait:		0:	0
-W: locked:		1: 	0
-W: blocked:		0: 	0
-T: prioeq:		1: 	80
-
-# T1 unlock L0
-C: unlock:		1: 	0
-W: locked:		0: 	0
-
-# Verify T1
-W: unlocked:		1: 	0
-T: prioeq:		1: 	80
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
-
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
deleted file mode 100644
index b4cc959..0000000
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock with priority inversion
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-W: locked:		1: 	0
-
-# Verify T1
-W: unlocked:		0: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
deleted file mode 100644
index 1b57376..0000000
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock with priority inversion
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-
-# Interrupt T1
-C: signal:		1:	0
-W: unlocked:		1: 	0
-T: opcodeeq:		1:	-4
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
deleted file mode 100644
index 68b1062..0000000
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ /dev/null
@@ -1,84 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 2 lock
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1:	1
-W: locked:		1: 	1
-
-# T0 lock L1
-C: lockintnowait:	0: 	1
-W: blocked:		0: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-
-# Make deadlock go away
-C: signal:		1:	0
-W: unlocked:		1:	0
-C: signal:		0:	0
-W: unlocked:		0:	1
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
-C: unlock:		1: 	1
-W: unlocked:		1: 	1
-
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
deleted file mode 100644
index 8e6c8b1..0000000
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
deleted file mode 100644
index 69c2212..0000000
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ /dev/null
@@ -1,88 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-T: prioeq:		1:	81
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
deleted file mode 100644
index 9b0f1eb..0000000
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: prioeq:		0:	80
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: locked:		1: 	0
-W: unlocked:		2: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
deleted file mode 100644
index 39ec74a..0000000
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-# Reset event counter
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set priorities
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-C: schedfifo:		2: 	81
-
-# T0 lock L0
-C: lock:		0:	0
-W: locked:		0: 	0
-
-# T1 lock L0, no wait in the wakeup path
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0:	80
-T: prioeq:		1:	80
-
-# T2 lock L0 interruptible, no wait in the wakeup path
-C: lockintnowait:	2:	0
-W: blocked:		2: 	0
-T: prioeq:		0:	81
-T: prioeq:		1:	80
-
-# Interrupt T2
-C: signal:		2:	2
-W: unlocked:		2:	0
-T: prioeq:		1:	80
-T: prioeq:		0:	80
-
-T: locked:		0:	0
-T: blocked:		1:	0
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T1 has locked L0 and exit
-W: locked:		1:	0
-W: unlocked:		0: 	0
-T: priolt:		0:	1
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
-
-
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
deleted file mode 100644
index e03db7e..0000000
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ /dev/null
@@ -1,91 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI steal pending ownership
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-C: schedfifo:		2: 	81
-
-# T0 lock L0
-C: lock:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: lock:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T1 is in the wakeup loop
-W: blockedwake:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: lock:		2: 	0
-# T1 leave wakeup loop
-C: lockcont:		1: 	0
-
-# T2 must have the lock and T1 must be blocked
-W: locked:		2: 	0
-W: blocked:		1: 	0
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-# Wait until T1 is in the wakeup loop and let it run
-W: blockedwake:		1: 	0
-C: lockcont:		1: 	0
-W: locked:		1: 	0
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
deleted file mode 100644
index 7b59100..0000000
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 2 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
deleted file mode 100644
index 2f0e049..0000000
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ /dev/null
@@ -1,118 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 4 threads 2 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T3 lock L0
-C: lockintnowait:	3: 	0
-W: blocked:		3: 	0
-T: prioeq:		0: 	83
-
-# T0 lock L1
-C: lock:		0: 	1
-W: blocked:		0: 	1
-T: prioeq:		1: 	83
-
-# T1 unlock L1
-C: unlock:		1:	1
-
-# Wait until T0 is in the wakeup code
-W: blockedwake:		0:	1
-
-# Verify that T1 is unboosted
-W: unlocked:		1: 	1
-T: priolt:		1: 	1
-
-# T2 lock L1 (T0 is boosted and pending owner !)
-C: locknowait:		2:	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	83
-
-# Interrupt T3 and wait until T3 returned
-C: signal:		3:	0
-W: unlocked:		3:	0
-
-# Verify prio of T0 (still pending owner,
-# but T2 is enqueued due to the previous boost by T3
-T: prioeq:		0:	82
-
-# Let T0 continue
-C: lockcont:		0:	1
-W: locked:		0:	1
-
-# Unlock L1 and let T2 get L1
-C: unlock:		0:	1
-W: locked:		2:	1
-
-# Verify that T0 is unboosted
-W: unlocked:		0:	1
-T: priolt:		0:	1
-
-# Unlock everything and exit
-C: unlock:		2:	1
-W: unlocked:		2:	1
-
-C: unlock:		0:	0
-W: unlocked:		0:	0
-
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
deleted file mode 100644
index 04f4034..0000000
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ /dev/null
@@ -1,178 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 5 threads 4 lock PI - modify priority of blocked threads
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-C: schedfifo:		4: 	84
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L2
-C: locknowait:		2: 	2
-W: locked:		2: 	2
-
-# T2 lock L1
-C: lockintnowait:	2: 	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-
-# T3 lock L3
-C: locknowait:		3: 	3
-W: locked:		3: 	3
-
-# T3 lock L2
-C: lockintnowait:	3: 	2
-W: blocked:		3: 	2
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-
-# T4 lock L3
-C: lockintnowait:	4:	3
-W: blocked:		4: 	3
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-
-# Reduce prio of T4
-C: schedfifo:		4: 	80
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-T: prioeq:		4:	80
-
-# Increase prio of T4
-C: schedfifo:		4: 	84
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Reduce prio of T3
-C: schedfifo:		3: 	80
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Increase prio of T3
-C: schedfifo:		3: 	85
-T: prioeq:		0: 	85
-T: prioeq:		1:	85
-T: prioeq:		2:	85
-T: prioeq:		3:	85
-T: prioeq:		4:	84
-
-# Reduce prio of T3
-C: schedfifo:		3: 	83
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Signal T4
-C: signal:		4: 	0
-W: unlocked:		4: 	3
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-
-# Signal T3
-C: signal:		3: 	0
-W: unlocked:		3: 	2
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-T: prioeq:		2:	82
-
-# Signal T2
-C: signal:		2: 	0
-W: unlocked:		2: 	1
-T: prioeq:		0: 	81
-T: prioeq:		1:	81
-
-# Signal T1
-C: signal:		1: 	0
-W: unlocked:		1: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		3:	3
-C: unlock:		2:	2
-C: unlock:		1:	1
-C: unlock:		0:	0
-
-W: unlocked:		3:	3
-W: unlocked:		2:	2
-W: unlocked:		1:	1
-W: unlocked:		0:	0
-
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
deleted file mode 100644
index a48a6ee..0000000
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ /dev/null
@@ -1,138 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 5 threads 4 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-C: schedfifo:		4: 	84
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L2
-C: locknowait:		2: 	2
-W: locked:		2: 	2
-
-# T2 lock L1
-C: lockintnowait:	2: 	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-
-# T3 lock L3
-C: locknowait:		3: 	3
-W: locked:		3: 	3
-
-# T3 lock L2
-C: lockintnowait:	3: 	2
-W: blocked:		3: 	2
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-
-# T4 lock L3
-C: lockintnowait:	4:	3
-W: blocked:		4: 	3
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-
-# Signal T4
-C: signal:		4: 	0
-W: unlocked:		4: 	3
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-
-# Signal T3
-C: signal:		3: 	0
-W: unlocked:		3: 	2
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-T: prioeq:		2:	82
-
-# Signal T2
-C: signal:		2: 	0
-W: unlocked:		2: 	1
-T: prioeq:		0: 	81
-T: prioeq:		1:	81
-
-# Signal T1
-C: signal:		1: 	0
-W: unlocked:		1: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		3:	3
-C: unlock:		2:	2
-C: unlock:		1:	1
-C: unlock:		0:	0
-
-W: unlocked:		3:	3
-W: unlocked:		2:	2
-W: unlocked:		1:	1
-W: unlocked:		0:	0
-
-- 
cgit v0.10.2


From 5d285a7f35b55c8fba346306ad3eb174c4f7eab1 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 21 Jul 2015 01:40:45 +0800
Subject: futex: Make should_fail_futex() static

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: kbuild-all@01.org
Cc: tipbuild@zytor.com
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Brian Silverman <bsilver16384@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 6ea31bb..6e443ef 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -279,7 +279,7 @@ static int __init setup_fail_futex(char *str)
 }
 __setup("fail_futex=", setup_fail_futex);
 
-bool should_fail_futex(bool fshared)
+static bool should_fail_futex(bool fshared)
 {
 	if (fail_futex.ignore_private && !fshared)
 		return false;
-- 
cgit v0.10.2


From 56d1defe0bbddaa97d6e74b51490904130fd4f1d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 15 Jul 2015 15:47:25 +0200
Subject: atomic: Prepare generic atomic implementation for logic ops

Clean up the #ifdef guards a bit to prepare for architectures to
supply their own logic ops.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 1973ad2..92947e0 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -98,14 +98,22 @@ ATOMIC_OP_RETURN(add, +)
 ATOMIC_OP_RETURN(sub, -)
 #endif
 
-#ifndef atomic_clear_mask
+#ifndef atomic_and
 ATOMIC_OP(and, &)
+#endif
+
+#ifndef atomic_clear_mask
 #define atomic_clear_mask(i, v) atomic_and(~(i), (v))
 #endif
 
-#ifndef atomic_set_mask
+#ifndef atomic_or
+#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
 #define CONFIG_ARCH_HAS_ATOMIC_OR
+#endif
 ATOMIC_OP(or, |)
+#endif
+
+#ifndef atomic_set_mask
 #define atomic_set_mask(i, v)	atomic_or((i), (v))
 #endif
 
-- 
cgit v0.10.2


From 212d3be102d73dce70cc12f39dce4e0aed2c025b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:07:47 +0200
Subject: alpha: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 8f8eafb..0eff853 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -29,13 +29,13 @@
  * branch back to restart the operation.
  */
 
-#define ATOMIC_OP(op)							\
+#define ATOMIC_OP(op, asm_op)						\
 static __inline__ void atomic_##op(int i, atomic_t * v)			\
 {									\
 	unsigned long temp;						\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
-	"	" #op "l %0,%2,%0\n"					\
+	"	" #asm_op " %0,%2,%0\n"					\
 	"	stl_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -45,15 +45,15 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 	:"Ir" (i), "m" (v->counter));					\
 }									\
 
-#define ATOMIC_OP_RETURN(op)						\
+#define ATOMIC_OP_RETURN(op, asm_op)					\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	long temp, result;						\
 	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
-	"	" #op "l %0,%3,%2\n"					\
-	"	" #op "l %0,%3,%0\n"					\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
 	"	stl_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -65,13 +65,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC64_OP(op)							\
+#define ATOMIC64_OP(op, asm_op)						\
 static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 {									\
 	unsigned long temp;						\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
-	"	" #op "q %0,%2,%0\n"					\
+	"	" #asm_op " %0,%2,%0\n"					\
 	"	stq_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -81,15 +81,15 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 	:"Ir" (i), "m" (v->counter));					\
 }									\
 
-#define ATOMIC64_OP_RETURN(op)						\
+#define ATOMIC64_OP_RETURN(op, asm_op)					\
 static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 {									\
 	long temp, result;						\
 	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
-	"	" #op "q %0,%3,%2\n"					\
-	"	" #op "q %0,%3,%0\n"					\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
 	"	stq_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -101,15 +101,28 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 	return result;							\
 }
 
-#define ATOMIC_OPS(opg)							\
-	ATOMIC_OP(opg)							\
-	ATOMIC_OP_RETURN(opg)						\
-	ATOMIC64_OP(opg)						\
-	ATOMIC64_OP_RETURN(opg)
+#define ATOMIC_OPS(op)							\
+	ATOMIC_OP(op, op##l)						\
+	ATOMIC_OP_RETURN(op, op##l)					\
+	ATOMIC64_OP(op, op##q)						\
+	ATOMIC64_OP_RETURN(op, op##q)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+#define atomic_andnot atomic_andnot
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(andnot, bic)
+ATOMIC_OP(or, bis)
+ATOMIC_OP(xor, xor)
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(andnot, bic)
+ATOMIC64_OP(or, bis)
+ATOMIC64_OP(xor, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
-- 
cgit v0.10.2


From cda7e4137a4e39c7d877b6ed41da13a5b30dd2cf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:06:20 +0200
Subject: arc: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 03484cb..e90b701 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -143,14 +143,29 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
-ATOMIC_OP(and, &=, and)
 
-#define atomic_clear_mask(mask, v) atomic_and(~(mask), (v))
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(andnot, &= ~, bic)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
-- 
cgit v0.10.2


From 125897908c718972351b589da89b7f990892d4df Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:04:39 +0200
Subject: arm: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index e22c119..ff214ba 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -194,6 +194,14 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(andnot, &= ~, bic)
+ATOMIC_OP(or,  |=, orr)
+ATOMIC_OP(xor, ^=, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -321,6 +329,13 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OP(and, and, and)
+ATOMIC64_OP(andnot, bic, bic)
+ATOMIC64_OP(or,  orr, orr)
+ATOMIC64_OP(xor, eor, eor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
-- 
cgit v0.10.2


From 22288b40e2c7ab484a08a03df32782a67903912c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:03:13 +0200
Subject: arm64: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 7047051..2876173 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -85,6 +85,14 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(andnot, bic)
+ATOMIC_OP(or, orr)
+ATOMIC_OP(xor, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -183,6 +191,13 @@ static inline long atomic64_##op##_return(long i, atomic64_t *v)	\
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, sub)
 
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(andnot, bic)
+ATOMIC64_OP(or, orr)
+ATOMIC64_OP(xor, eor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
-- 
cgit v0.10.2


From f8a570e270bf62363cd498ac2ac8ea07a76ad4d6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:02:20 +0200
Subject: avr32: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 2d07ce1..115d300 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -44,6 +44,20 @@ static inline int __atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(sub, sub, rKs21)
 ATOMIC_OP_RETURN(add, add, r)
 
+#define ATOMIC_OP(op, asm_op)						\
+ATOMIC_OP_RETURN(op, asm_op, r)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	(void)__atomic_##op##_return(i, v);				\
+}
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(or, or)
+ATOMIC_OP(xor, eor)
+
+#undef ATOMIC_OP
 #undef ATOMIC_OP_RETURN
 
 /*
-- 
cgit v0.10.2


From d835b6c4cc02507b3bf3f8ee6c86857cf0ee67ab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 23 Apr 2015 21:44:42 +0200
Subject: blackfin: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

TODO: use inline asm or at least asm macros to collapse the lot.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index a107a98..eafa55b 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -16,19 +16,33 @@
 #include <linux/types.h>
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
-asmlinkage int __raw_atomic_update_asm(volatile int *ptr, int value);
-asmlinkage int __raw_atomic_clear_asm(volatile int *ptr, int value);
-asmlinkage int __raw_atomic_set_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+
+asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_xor_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 
 #define atomic_read(v) __raw_uncached_fetch_asm(&(v)->counter)
 
-#define atomic_add_return(i, v) __raw_atomic_update_asm(&(v)->counter, i)
-#define atomic_sub_return(i, v) __raw_atomic_update_asm(&(v)->counter, -(i))
+#define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
+#define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+#define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
 
-#define atomic_clear_mask(m, v) __raw_atomic_clear_asm(&(v)->counter, m)
-#define atomic_set_mask(m, v)   __raw_atomic_set_asm(&(v)->counter, m)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
 
 #endif
 
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index c446591..a401c27 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -83,11 +83,12 @@ EXPORT_SYMBOL(insl);
 EXPORT_SYMBOL(insl_16);
 
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(__raw_atomic_update_asm);
-EXPORT_SYMBOL(__raw_atomic_clear_asm);
-EXPORT_SYMBOL(__raw_atomic_set_asm);
+EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_and_asm);
+EXPORT_SYMBOL(__raw_atomic_or_asm);
 EXPORT_SYMBOL(__raw_atomic_xor_asm);
 EXPORT_SYMBOL(__raw_atomic_test_asm);
+
 EXPORT_SYMBOL(__raw_xchg_1_asm);
 EXPORT_SYMBOL(__raw_xchg_2_asm);
 EXPORT_SYMBOL(__raw_xchg_4_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 2a08df8..26fccb5 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -587,10 +587,10 @@ ENDPROC(___raw_write_unlock_asm)
  * r0 = ptr
  * r1 = value
  *
- * Add a signed value to a 32bit word and return the new value atomically.
+ * ADD a signed value to a 32bit word and return the new value atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_update_asm)
+ENTRY(___raw_atomic_add_asm)
 	p1 = r0;
 	r3 = r1;
 	[--sp] = rets;
@@ -603,19 +603,19 @@ ENTRY(___raw_atomic_update_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_update_asm)
+ENDPROC(___raw_atomic_add_asm)
 
 /*
  * r0 = ptr
  * r1 = mask
  *
- * Clear the mask bits from a 32bit word and return the old 32bit value
+ * AND the mask bits from a 32bit word and return the old 32bit value
  * atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_clear_asm)
+ENTRY(___raw_atomic_and_asm)
 	p1 = r0;
-	r3 = ~r1;
+	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
 	r2 = [p1];
@@ -627,17 +627,17 @@ ENTRY(___raw_atomic_clear_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_clear_asm)
+ENDPROC(___raw_atomic_and_asm)
 
 /*
  * r0 = ptr
  * r1 = mask
  *
- * Set the mask bits into a 32bit word and return the old 32bit value
+ * OR the mask bits into a 32bit word and return the old 32bit value
  * atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_set_asm)
+ENTRY(___raw_atomic_or_asm)
 	p1 = r0;
 	r3 = r1;
 	[--sp] = rets;
@@ -651,7 +651,7 @@ ENTRY(___raw_atomic_set_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_set_asm)
+ENDPROC(___raw_atomic_or_asm)
 
 /*
  * r0 = ptr
@@ -787,7 +787,7 @@ ENTRY(___raw_bit_set_asm)
 	r2 = r1;
 	r1 = 1;
 	r1 <<= r2;
-	jump ___raw_atomic_set_asm
+	jump ___raw_atomic_or_asm
 ENDPROC(___raw_bit_set_asm)
 
 /*
@@ -798,10 +798,10 @@ ENDPROC(___raw_bit_set_asm)
  * Clobbers: r3:0, p1:0
  */
 ENTRY(___raw_bit_clear_asm)
-	r2 = r1;
-	r1 = 1;
-	r1 <<= r2;
-	jump ___raw_atomic_clear_asm
+	r2 = 1;
+	r2 <<= r1;
+	r1 = ~r2;
+	jump ___raw_atomic_and_asm
 ENDPROC(___raw_bit_clear_asm)
 
 /*
-- 
cgit v0.10.2


From 610f7ba93b1e4cda7370f367a6042cbda61227bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:00:54 +0200
Subject: hexagon: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 93d0702..4efe2c7 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -132,6 +132,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
-- 
cgit v0.10.2


From 70ed47390d10a7351e44c3a07c5de376780cb6c1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:00:01 +0200
Subject: ia64: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0bf0350..0809ef5 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -45,8 +45,6 @@ ia64_atomic_##op (int i, atomic_t *v)					\
 ATOMIC_OP(add, +)
 ATOMIC_OP(sub, -)
 
-#undef ATOMIC_OP
-
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
@@ -71,6 +69,18 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, &)
+ATOMIC_OP(or, |)
+ATOMIC_OP(xor, ^)
+
+#define atomic_and(i,v)	(void)ia64_atomic_and(i,v)
+#define atomic_or(i,v)	(void)ia64_atomic_or(i,v)
+#define atomic_xor(i,v)	(void)ia64_atomic_xor(i,v)
+
+#undef ATOMIC_OP
+
 #define ATOMIC64_OP(op, c_op)						\
 static __inline__ long							\
 ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
@@ -89,8 +99,6 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
 ATOMIC64_OP(add, +)
 ATOMIC64_OP(sub, -)
 
-#undef ATOMIC64_OP
-
 #define atomic64_add_return(i,v)					\
 ({									\
 	long __ia64_aar_i = (i);					\
@@ -115,6 +123,16 @@ ATOMIC64_OP(sub, -)
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
 
+ATOMIC64_OP(and, &)
+ATOMIC64_OP(or, |)
+ATOMIC64_OP(xor, ^)
+
+#define atomic64_and(i,v)	(void)ia64_atomic64_and(i,v)
+#define atomic64_or(i,v)	(void)ia64_atomic64_or(i,v)
+#define atomic64_xor(i,v)	(void)ia64_atomic64_xor(i,v)
+
+#undef ATOMIC64_OP
+
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-- 
cgit v0.10.2


From c66e45edef51e1f54297ddaf202fc2dd00852734 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:57:49 +0200
Subject: m32r: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 31bb74a..7245463 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -94,6 +94,12 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -240,44 +246,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 }
 
 
-static __inline__ void atomic_clear_mask(unsigned long  mask, atomic_t *addr)
+static __inline__ __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned long flags;
-	unsigned long tmp;
-
-	local_irq_save(flags);
-	__asm__ __volatile__ (
-		"# atomic_clear_mask		\n\t"
-		DCACHE_CLEAR("%0", "r5", "%1")
-		M32R_LOCK" %0, @%1;		\n\t"
-		"and	%0, %2;			\n\t"
-		M32R_UNLOCK" %0, @%1;		\n\t"
-		: "=&r" (tmp)
-		: "r" (addr), "r" (~mask)
-		: "memory"
-		__ATOMIC_CLOBBER
-	);
-	local_irq_restore(flags);
+	atomic_and(~mask, v);
 }
 
-static __inline__ void atomic_set_mask(unsigned long  mask, atomic_t *addr)
+static __inline__ __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned long flags;
-	unsigned long tmp;
-
-	local_irq_save(flags);
-	__asm__ __volatile__ (
-		"# atomic_set_mask		\n\t"
-		DCACHE_CLEAR("%0", "r5", "%1")
-		M32R_LOCK" %0, @%1;		\n\t"
-		"or	%0, %2;			\n\t"
-		M32R_UNLOCK" %0, @%1;		\n\t"
-		: "=&r" (tmp)
-		: "r" (addr), "r" (mask)
-		: "memory"
-		__ATOMIC_CLOBBER
-	);
-	local_irq_restore(flags);
+	atomic_or(mask, v);
 }
 
 #endif	/* _ASM_M32R_ATOMIC_H */
-- 
cgit v0.10.2


From 74b1bc505b058efdfef33e775df68d8324bbf8b8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:56:20 +0200
Subject: m68k: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e85f047..c30e43e 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -77,6 +77,12 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -170,14 +176,14 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *v)
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	__asm__ __volatile__("andl %1,%0" : "+m" (*v) : ASM_DI (~(mask)));
+	atomic_and(~mask, v);
 }
 
-static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	__asm__ __volatile__("orl %1,%0" : "+m" (*v) : ASM_DI (mask));
+	atomic_or(mask, v);
 }
 
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
-- 
cgit v0.10.2


From 5b4a2f0f91fd4a635f380f847254d23121972e7b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:53:39 +0200
Subject: metag: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 948d868..930c12c 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -74,42 +74,24 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	int temp;
-
-	asm volatile (
-		"1:	LNKGETD %0, [%1]\n"
-		"	AND	%0, %0, %2\n"
-		"	LNKSETD	[%1] %0\n"
-		"	DEFR	%0, TXSTAT\n"
-		"	ANDT	%0, %0, #HI(0x3f000000)\n"
-		"	CMPT	%0, #HI(0x02000000)\n"
-		"	BNZ	1b\n"
-		: "=&d" (temp)
-		: "da" (&v->counter), "bd" (~mask)
-		: "cc");
+	atomic_and(~mask, v);
 }
 
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	int temp;
-
-	asm volatile (
-		"1:	LNKGETD %0, [%1]\n"
-		"	OR	%0, %0, %2\n"
-		"	LNKSETD	[%1], %0\n"
-		"	DEFR	%0, TXSTAT\n"
-		"	ANDT	%0, %0, #HI(0x3f000000)\n"
-		"	CMPT	%0, #HI(0x02000000)\n"
-		"	BNZ	1b\n"
-		: "=&d" (temp)
-		: "da" (&v->counter), "bd" (mask)
-		: "cc");
+	atomic_or(mask, v);
 }
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index f5d5898..7d88725 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -68,29 +68,22 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned long flags;
-
-	__global_lock1(flags);
-	fence();
-	v->counter &= ~mask;
-	__global_unlock1(flags);
+	atomic_and(~mask, v);
 }
 
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned long flags;
-
-	__global_lock1(flags);
-	fence();
-	v->counter |= mask;
-	__global_unlock1(flags);
+	atomic_or(mask, v);
 }
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-- 
cgit v0.10.2


From 27782f2752aca65a241f10fb2d4508c71bb2656b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:51:36 +0200
Subject: mips: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 26d4363..0430ba6 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -137,6 +137,12 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -416,6 +422,9 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
 
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
+ATOMIC64_OP(and, &=, and)
+ATOMIC64_OP(or, |=, or)
+ATOMIC64_OP(xor, ^=, xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
-- 
cgit v0.10.2


From ddb7573ff68964e7b3b72eeb9cde1384c4c6ba83 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:50:20 +0200
Subject: mn10300: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 5be655e..03eea81 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -89,6 +89,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -134,31 +140,9 @@ static inline void atomic_dec(atomic_t *v)
  *
  * Atomically clears the bits set in mask from the memory word specified.
  */
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-#ifdef CONFIG_SMP
-	int status;
-
-	asm volatile(
-		"1:	mov	%3,(_AAR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"
-		"	and	%4,%0		\n"
-		"	mov	%0,(_ADR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"	/* flush */
-		"	mov	(_ASR,%2),%0	\n"
-		"	or	%0,%0		\n"
-		"	bne	1b		\n"
-		: "=&r"(status), "=m"(*addr)
-		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(addr), "r"(~mask)
-		: "memory", "cc");
-#else
-	unsigned long flags;
-
-	mask = ~mask;
-	flags = arch_local_cli_save();
-	*addr &= mask;
-	arch_local_irq_restore(flags);
-#endif
+	atomic_and(~mask, v);
 }
 
 /**
@@ -168,30 +152,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
  *
  * Atomically sets the bits set in mask from the memory word specified.
  */
-static inline void atomic_set_mask(unsigned long mask, unsigned long *addr)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-#ifdef CONFIG_SMP
-	int status;
-
-	asm volatile(
-		"1:	mov	%3,(_AAR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"
-		"	or	%4,%0		\n"
-		"	mov	%0,(_ADR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"	/* flush */
-		"	mov	(_ASR,%2),%0	\n"
-		"	or	%0,%0		\n"
-		"	bne	1b		\n"
-		: "=&r"(status), "=m"(*addr)
-		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(addr), "r"(mask)
-		: "memory", "cc");
-#else
-	unsigned long flags;
-
-	flags = arch_local_cli_save();
-	*addr |= mask;
-	arch_local_irq_restore(flags);
-#endif
+	atomic_or(mask, v);
 }
 
 #endif /* __KERNEL__ */
-- 
cgit v0.10.2


From aebea9359623b58b54242e5daa24e09d6104977b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:47:25 +0200
Subject: parisc: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 226f8ca9..be2c50d 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -126,6 +126,12 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -185,6 +191,9 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
+ATOMIC64_OP(and, &=)
+ATOMIC64_OP(or, |=)
+ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
-- 
cgit v0.10.2


From d0b7eb6ff2985c9350fdb297fa5727c35324d4df Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:46:23 +0200
Subject: powerpc: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 512d278..6ca89e2 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -67,6 +67,12 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(or, or)
+ATOMIC_OP(xor, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -304,6 +310,9 @@ static __inline__ long atomic64_##op##_return(long a, atomic64_t *v)	\
 
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(or, or)
+ATOMIC64_OP(xor, xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
-- 
cgit v0.10.2


From 658aa51459c2f5284183d35b6dd0beca0e0bfe2f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:44:20 +0200
Subject: sh: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index 97a5fda..4b03830 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -48,47 +48,14 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	int tmp;
-	unsigned int _mask = ~mask;
-
-	__asm__ __volatile__ (
-		"   .align 2              \n\t"
-		"   mova    1f,   r0      \n\t" /* r0 = end point */
-		"   mov    r15,   r1      \n\t" /* r1 = saved sp */
-		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */
-		"   mov.l  @%1,   %0      \n\t" /* load  old value */
-		"   and     %2,   %0      \n\t" /* add */
-		"   mov.l   %0,   @%1     \n\t" /* store new value */
-		"1: mov     r1,   r15     \n\t" /* LOGOUT */
-		: "=&r" (tmp),
-		  "+r"  (v)
-		: "r"   (_mask)
-		: "memory" , "r0", "r1");
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	int tmp;
-
-	__asm__ __volatile__ (
-		"   .align 2              \n\t"
-		"   mova    1f,   r0      \n\t" /* r0 = end point */
-		"   mov    r15,   r1      \n\t" /* r1 = saved sp */
-		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */
-		"   mov.l  @%1,   %0      \n\t" /* load  old value */
-		"   or      %2,   %0      \n\t" /* or */
-		"   mov.l   %0,   @%1     \n\t" /* store new value */
-		"1: mov     r1,   r15     \n\t" /* LOGOUT */
-		: "=&r" (tmp),
-		  "+r"  (v)
-		: "r"   (mask)
-		: "memory" , "r0", "r1");
-}
-
 #endif /* __ASM_SH_ATOMIC_GRB_H */
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 61d1075..23fcdad 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -37,27 +37,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	v->counter &= ~mask;
-	raw_local_irq_restore(flags);
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	v->counter |= mask;
-	raw_local_irq_restore(flags);
-}
-
 #endif /* __ASM_SH_ATOMIC_IRQ_H */
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 8575dcc..33d34b1 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -52,37 +52,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-"1:	movli.l @%2, %0		! atomic_clear_mask	\n"
-"	and	%1, %0					\n"
-"	movco.l	%0, @%2					\n"
-"	bf	1b					\n"
-	: "=&z" (tmp)
-	: "r" (~mask), "r" (&v->counter)
-	: "t");
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-"1:	movli.l @%2, %0		! atomic_set_mask	\n"
-"	or	%1, %0					\n"
-"	movco.l	%0, @%2					\n"
-"	bf	1b					\n"
-	: "=&z" (tmp)
-	: "r" (mask), "r" (&v->counter)
-	: "t");
-}
-
 #endif /* __ASM_SH_ATOMIC_LLSC_H */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 05b9f74..cee0245 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,6 +25,16 @@
 #include <asm/atomic-irq.h>
 #endif
 
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
-- 
cgit v0.10.2


From 304a0d699a3c6103b61d5ea18d56820e7d8e3116 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:40:25 +0200
Subject: sparc: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 0e69b7e..e19d888 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -17,10 +17,14 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
 
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
+void atomic_and(int, atomic_t *);
+void atomic_or(int, atomic_t *);
+void atomic_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 4082749..d6af27c 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -33,6 +33,12 @@ long atomic64_##op##_return(long, atomic64_t *);
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 71cd65a..b9d63c0 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,22 +27,38 @@ static DEFINE_SPINLOCK(dummy);
 
 #endif /* SMP */
 
-#define ATOMIC_OP(op, cop)						\
+#define ATOMIC_OP_RETURN(op, c_op)					\
 int atomic_##op##_return(int i, atomic_t *v)				\
 {									\
 	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	ret = (v->counter cop i);					\
+	ret = (v->counter c_op i);					\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
 EXPORT_SYMBOL(atomic_##op##_return);
 
-ATOMIC_OP(add, +=)
+#define ATOMIC_OP(op, c_op)						\
+void atomic_##op(int i, atomic_t *v)					\
+{									\
+	unsigned long flags;						\
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
+									\
+	v->counter c_op i;						\
+									\
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+}									\
+EXPORT_SYMBOL(atomic_##op);
+
+ATOMIC_OP_RETURN(add, +=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
+#undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
 int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 05dac43..d6b0363 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -47,6 +47,9 @@ ENDPROC(atomic_##op##_return);
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
@@ -84,6 +87,9 @@ ENDPROC(atomic64_##op##_return);
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 1d649a9..bb60059 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -111,6 +111,9 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
-- 
cgit v0.10.2


From 2a3ed90f428cab65567c5421ebc0f6c8d02c1216 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:35:00 +0200
Subject: xtensa: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 00b7d46..4dd2450 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -145,10 +145,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
 /**
  * atomic_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
@@ -250,75 +266,6 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-#if XCHAL_HAVE_S32C1I
-	unsigned long tmp;
-	int result;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       and     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (~mask), "a" (v)
-			: "memory"
-			);
-#else
-	unsigned int all_f = -1;
-	unsigned int vval;
-
-	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
-			"       l32i    %0, %2, 0\n"
-			"       xor     %1, %4, %3\n"
-			"       and     %0, %0, %4\n"
-			"       s32i    %0, %2, 0\n"
-			"       wsr     a15, ps\n"
-			"       rsync\n"
-			: "=&a" (vval), "=a" (mask)
-			: "a" (v), "a" (all_f), "1" (mask)
-			: "a15", "memory"
-			);
-#endif
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-#if XCHAL_HAVE_S32C1I
-	unsigned long tmp;
-	int result;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       or      %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (mask), "a" (v)
-			: "memory"
-			);
-#else
-	unsigned int vval;
-
-	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
-			"       l32i    %0, %2, 0\n"
-			"       or      %0, %0, %1\n"
-			"       s32i    %0, %2, 0\n"
-			"       wsr     a15, ps\n"
-			"       rsync\n"
-			: "=&a" (vval)
-			: "a" (mask), "a" (v)
-			: "a15", "memory"
-			);
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
-- 
cgit v0.10.2


From ae8c35c85be92b79c545c57c2a14c2f8136d3353 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:15:48 +0200
Subject: s390: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index adbe380..b3859d8 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -27,6 +27,7 @@
 #define __ATOMIC_OR	"lao"
 #define __ATOMIC_AND	"lan"
 #define __ATOMIC_ADD	"laa"
+#define __ATOMIC_XOR	"lax"
 #define __ATOMIC_BARRIER "bcr	14,0\n"
 
 #define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -49,6 +50,7 @@
 #define __ATOMIC_OR	"or"
 #define __ATOMIC_AND	"nr"
 #define __ATOMIC_ADD	"ar"
+#define __ATOMIC_XOR	"xr"
 #define __ATOMIC_BARRIER "\n"
 
 #define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -118,14 +120,26 @@ static inline void atomic_add(int i, atomic_t *v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+#define ATOMIC_OP(op, OP)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER);	\
+}
+
+ATOMIC_OP(and, AND)
+ATOMIC_OP(or, OR)
+ATOMIC_OP(xor, XOR)
+
+#undef ATOMIC_OP
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	__ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
+	atomic_and(~mask, v);
 }
 
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	__ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
+	atomic_or(mask, v);
 }
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -167,6 +181,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 #define __ATOMIC64_OR	"laog"
 #define __ATOMIC64_AND	"lang"
 #define __ATOMIC64_ADD	"laag"
+#define __ATOMIC64_XOR	"laxg"
 #define __ATOMIC64_BARRIER "bcr	14,0\n"
 
 #define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -189,6 +204,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 #define __ATOMIC64_OR	"ogr"
 #define __ATOMIC64_AND	"ngr"
 #define __ATOMIC64_ADD	"agr"
+#define __ATOMIC64_XOR	"xgr"
 #define __ATOMIC64_BARRIER "\n"
 
 #define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -247,16 +263,6 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
 }
 
-static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
-{
-	__ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
-}
-
-static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
-{
-	__ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
-}
-
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
 static inline long long atomic64_cmpxchg(atomic64_t *v,
@@ -270,6 +276,19 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
 	return old;
 }
 
+#define ATOMIC64_OP(op, OP)						\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
+}
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC64_OP(and, AND)
+ATOMIC64_OP(or, OR)
+ATOMIC64_OP(xor, XOR)
+
+#undef ATOMIC64_OP
 #undef __ATOMIC64_LOOP
 
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
-- 
cgit v0.10.2


From 7fc1845dd45a825b3c2b760df342a94f61fb1113 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 20:28:37 +0200
Subject: x86: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index e916895..f3a3ec0 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -182,6 +182,23 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
+#define ATOMIC_OP(op)							\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	asm volatile(LOCK_PREFIX #op"l %1,%0"				\
+			: "+m" (v->counter)				\
+			: "ir" (i)					\
+			: "memory");					\
+}
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
 /**
  * __atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
@@ -219,15 +236,15 @@ static __always_inline short int atomic_inc_short(short int *v)
 	return *v;
 }
 
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "andl %0,%1"			\
-		     : : "r" (~(mask)), "m" (*(addr)) : "memory")
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
 
-#define atomic_set_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "orl %0,%1"			\
-		     : : "r" ((unsigned)(mask)), "m" (*(addr))	\
-		     : "memory")
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
 
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index b154de7..a11c30b 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -313,4 +313,18 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #undef alternative_atomic64
 #undef __alternative_atomic64
 
+#define ATOMIC64_OP(op, c_op)						\
+static inline void atomic64_##op(long long i, atomic64_t *v)		\
+{									\
+	long long old, c = 0;						\
+	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
+		c = old;						\
+}
+
+ATOMIC64_OP(and, &)
+ATOMIC64_OP(or, |)
+ATOMIC64_OP(xor, ^)
+
+#undef ATOMIC64_OP
+
 #endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index b965f9e..50e33ef 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -220,4 +220,19 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
+#define ATOMIC64_OP(op)							\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	asm volatile(LOCK_PREFIX #op"q %1,%0"				\
+			: "+m" (v->counter)				\
+			: "er" (i)					\
+			: "memory");					\
+}
+
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
+#undef ATOMIC64_OP
+
 #endif /* _ASM_X86_ATOMIC64_64_H */
-- 
cgit v0.10.2


From b0d8003ef405c4148b703cdaab1171045c6c3bbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Apr 2015 00:49:20 +0200
Subject: frv: Rewrite atomic implementation

Mostly complete rewrite of the FRV atomic implementation, instead of
using assembly files, use inline assembler.

The out-of-line CONFIG option makes a bit of a mess of things, but a
little CPP trickery gets that done too.

FRV already had the atomic logic ops but under a non standard name,
the reimplementation provides the generic names and provides the
intermediate form required for the bitops implementation.

The slightly inconsistent __atomic32_fetch_##op naming is because
__atomic_fetch_##op conlicts with GCC builtin functions.

The 64bit atomic ops use the inline assembly %Ln construct to access
the low word register (r+1), afaik this construct was not previously
used in the kernel and is completely undocumented, but I found it in
the FRV GCC code and it seems to work.

FRV had a non-standard definition of atomic_{clear,set}_mask() which
would work types other than atomic_t, the one user relying on that
(arch/frv/kernel/dma.c) got converted to use the new intermediate
form.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 102190a..74d2245 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -15,7 +15,6 @@
 #define _ASM_ATOMIC_H
 
 #include <linux/types.h>
-#include <asm/spr-regs.h>
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
@@ -23,6 +22,8 @@
 #error not SMP safe
 #endif
 
+#include <asm/atomic_defs.h>
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
@@ -34,56 +35,26 @@
 #define atomic_read(v)		ACCESS_ONCE((v)->counter)
 #define atomic_set(v, i)	(((v)->counter) = (i))
 
-#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-static inline int atomic_add_return(int i, atomic_t *v)
+static inline int atomic_inc_return(atomic_t *v)
 {
-	unsigned long val;
+	return __atomic_add_return(1, &v->counter);
+}
 
-	asm("0:						\n"
-	    "	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-	    "	ckeq		icc3,cc7		\n"
-	    "	ld.p		%M0,%1			\n"	/* LD.P/ORCR must be atomic */
-	    "	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-	    "	add%I2		%1,%2,%1		\n"
-	    "	cst.p		%1,%M0		,cc3,#1	\n"
-	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* clear ICC3.Z if store happens */
-	    "	beq		icc3,#0,0b		\n"
-	    : "+U"(v->counter), "=&r"(val)
-	    : "NPr"(i)
-	    : "memory", "cc7", "cc3", "icc3"
-	    );
+static inline int atomic_dec_return(atomic_t *v)
+{
+	return __atomic_sub_return(1, &v->counter);
+}
 
-	return val;
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	return __atomic_add_return(i, &v->counter);
 }
 
 static inline int atomic_sub_return(int i, atomic_t *v)
 {
-	unsigned long val;
-
-	asm("0:						\n"
-	    "	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-	    "	ckeq		icc3,cc7		\n"
-	    "	ld.p		%M0,%1			\n"	/* LD.P/ORCR must be atomic */
-	    "	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-	    "	sub%I2		%1,%2,%1		\n"
-	    "	cst.p		%1,%M0		,cc3,#1	\n"
-	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* clear ICC3.Z if store happens */
-	    "	beq		icc3,#0,0b		\n"
-	    : "+U"(v->counter), "=&r"(val)
-	    : "NPr"(i)
-	    : "memory", "cc7", "cc3", "icc3"
-	    );
-
-	return val;
+	return __atomic_sub_return(i, &v->counter);
 }
 
-#else
-
-extern int atomic_add_return(int i, atomic_t *v);
-extern int atomic_sub_return(int i, atomic_t *v);
-
-#endif
-
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
 	return atomic_add_return(i, v) < 0;
@@ -101,17 +72,14 @@ static inline void atomic_sub(int i, atomic_t *v)
 
 static inline void atomic_inc(atomic_t *v)
 {
-	atomic_add_return(1, v);
+	atomic_inc_return(v);
 }
 
 static inline void atomic_dec(atomic_t *v)
 {
-	atomic_sub_return(1, v);
+	atomic_dec_return(v);
 }
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
@@ -120,18 +88,19 @@ static inline void atomic_dec(atomic_t *v)
  * 64-bit atomic ops
  */
 typedef struct {
-	volatile long long counter;
+	long long counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-static inline long long atomic64_read(atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
 {
 	long long counter;
 
 	asm("ldd%I1 %M1,%0"
 	    : "=e"(counter)
 	    : "m"(v->counter));
+
 	return counter;
 }
 
@@ -142,10 +111,25 @@ static inline void atomic64_set(atomic64_t *v, long long i)
 		     : "e"(i));
 }
 
-extern long long atomic64_inc_return(atomic64_t *v);
-extern long long atomic64_dec_return(atomic64_t *v);
-extern long long atomic64_add_return(long long i, atomic64_t *v);
-extern long long atomic64_sub_return(long long i, atomic64_t *v);
+static inline long long atomic64_inc_return(atomic64_t *v)
+{
+	return __atomic64_add_return(1, &v->counter);
+}
+
+static inline long long atomic64_dec_return(atomic64_t *v)
+{
+	return __atomic64_sub_return(1, &v->counter);
+}
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+	return __atomic64_add_return(i, &v->counter);
+}
+
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+{
+	return __atomic64_sub_return(i, &v->counter);
+}
 
 static inline long long atomic64_add_negative(long long i, atomic64_t *v)
 {
@@ -176,6 +160,7 @@ static inline void atomic64_dec(atomic64_t *v)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_and_test(v)	(atomic64_inc_return((v)) == 0)
 
+
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&(v)->counter, old, new))
 #define atomic_xchg(v, new)		(xchg(&(v)->counter, new))
 #define atomic64_cmpxchg(v, old, new)	(__cmpxchg_64(old, new, &(v)->counter))
@@ -196,5 +181,33 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
+#define ATOMIC_OP(op)							\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	(void)__atomic32_fetch_##op(i, &v->counter);			\
+}									\
+									\
+static inline void atomic64_##op(long long i, atomic64_t *v)		\
+{									\
+	(void)__atomic64_fetch_##op(i, &v->counter);			\
+}
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(or)
+ATOMIC_OP(and)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
new file mode 100644
index 0000000..36e126d
--- /dev/null
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -0,0 +1,172 @@
+
+#include <asm/spr-regs.h>
+
+#ifdef __ATOMIC_LIB__
+
+#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
+
+#define ATOMIC_QUALS
+#define ATOMIC_EXPORT(x)	EXPORT_SYMBOL(x)
+
+#else /* !OUTOFLINE && LIB */
+
+#define ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)
+
+#endif /* OUTOFLINE */
+
+#else /* !__ATOMIC_LIB__ */
+
+#define ATOMIC_EXPORT(x)
+
+#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
+
+#define ATOMIC_OP_RETURN(op)						\
+extern int __atomic_##op##_return(int i, int *v);			\
+extern long long __atomic64_##op##_return(long long i, long long *v);
+
+#define ATOMIC_FETCH_OP(op)						\
+extern int __atomic32_fetch_##op(int i, int *v);			\
+extern long long __atomic64_fetch_##op(long long i, long long *v);
+
+#else /* !OUTOFLINE && !LIB */
+
+#define ATOMIC_QUALS	static inline
+
+#endif /* OUTOFLINE */
+#endif /* __ATOMIC_LIB__ */
+
+
+/*
+ * Note on the 64 bit inline asm variants...
+ *
+ * CSTD is a conditional instruction and needs a constrained memory reference.
+ * Normally 'U' provides the correct constraints for conditional instructions
+ * and this is used for the 32 bit version, however 'U' does not appear to work
+ * for 64 bit values (gcc-4.9)
+ *
+ * The exact constraint is that conditional instructions cannot deal with an
+ * immediate displacement in the memory reference, so what we do is we read the
+ * address through a volatile cast into a local variable in order to insure we
+ * _have_ to compute the correct address without displacement. This allows us
+ * to use the regular 'm' for the memory address.
+ *
+ * Furthermore, the %Ln operand, which prints the low word register (r+1),
+ * really only works for registers, this means we cannot allow immediate values
+ * for the 64 bit versions -- like we do for the 32 bit ones.
+ *
+ */
+
+#ifndef ATOMIC_OP_RETURN
+#define ATOMIC_OP_RETURN(op)						\
+ATOMIC_QUALS int __atomic_##op##_return(int i, int *v)			\
+{									\
+	int val;							\
+									\
+	asm volatile(							\
+	    "0:						\n"		\
+	    "	orcc		gr0,gr0,gr0,icc3	\n"		\
+	    "	ckeq		icc3,cc7		\n"		\
+	    "	ld.p		%M0,%1			\n"		\
+	    "	orcr		cc7,cc7,cc3		\n"		\
+	    "   "#op"%I2	%1,%2,%1		\n"		\
+	    "	cst.p		%1,%M0		,cc3,#1	\n"		\
+	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"		\
+	    "	beq		icc3,#0,0b		\n"		\
+	    : "+U"(*v), "=&r"(val)					\
+	    : "NPr"(i)							\
+	    : "memory", "cc7", "cc3", "icc3"				\
+	    );								\
+									\
+	return val;							\
+}									\
+ATOMIC_EXPORT(__atomic_##op##_return);					\
+									\
+ATOMIC_QUALS long long __atomic64_##op##_return(long long i, long long *v)	\
+{									\
+	long long *__v = READ_ONCE(v);					\
+	long long val;							\
+									\
+	asm volatile(							\
+	    "0:						\n"		\
+	    "	orcc		gr0,gr0,gr0,icc3	\n"		\
+	    "	ckeq		icc3,cc7		\n"		\
+	    "	ldd.p		%M0,%1			\n"		\
+	    "	orcr		cc7,cc7,cc3		\n"		\
+	    "   "#op"cc		%L1,%L2,%L1,icc0	\n"		\
+	    "   "#op"x		%1,%2,%1,icc0		\n"		\
+	    "	cstd.p		%1,%M0		,cc3,#1	\n"		\
+	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"		\
+	    "	beq		icc3,#0,0b		\n"		\
+	    : "+m"(*__v), "=&e"(val)					\
+	    : "e"(i)							\
+	    : "memory", "cc7", "cc3", "icc0", "icc3"			\
+	    );								\
+									\
+	return val;							\
+}									\
+ATOMIC_EXPORT(__atomic64_##op##_return);
+#endif
+
+#ifndef ATOMIC_FETCH_OP
+#define ATOMIC_FETCH_OP(op)						\
+ATOMIC_QUALS int __atomic32_fetch_##op(int i, int *v)			\
+{									\
+	int old, tmp;							\
+									\
+	asm volatile(							\
+		"0:						\n"	\
+		"	orcc		gr0,gr0,gr0,icc3	\n"	\
+		"	ckeq		icc3,cc7		\n"	\
+		"	ld.p		%M0,%1			\n"	\
+		"	orcr		cc7,cc7,cc3		\n"	\
+		"	"#op"%I3	%1,%3,%2		\n"	\
+		"	cst.p		%2,%M0		,cc3,#1	\n"	\
+		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	\
+		"	beq		icc3,#0,0b		\n"	\
+		: "+U"(*v), "=&r"(old), "=r"(tmp)			\
+		: "NPr"(i)						\
+		: "memory", "cc7", "cc3", "icc3"			\
+		);							\
+									\
+	return old;							\
+}									\
+ATOMIC_EXPORT(__atomic32_fetch_##op);					\
+									\
+ATOMIC_QUALS long long __atomic64_fetch_##op(long long i, long long *v)	\
+{									\
+	long long *__v = READ_ONCE(v);					\
+	long long old, tmp;						\
+									\
+	asm volatile(							\
+		"0:						\n"	\
+		"	orcc		gr0,gr0,gr0,icc3	\n"	\
+		"	ckeq		icc3,cc7		\n"	\
+		"	ldd.p		%M0,%1			\n"	\
+		"	orcr		cc7,cc7,cc3		\n"	\
+		"	"#op"		%L1,%L3,%L2		\n"	\
+		"	"#op"		%1,%3,%2		\n"	\
+		"	cstd.p		%2,%M0		,cc3,#1	\n"	\
+		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	\
+		"	beq		icc3,#0,0b		\n"	\
+		: "+m"(*__v), "=&e"(old), "=e"(tmp)			\
+		: "e"(i)						\
+		: "memory", "cc7", "cc3", "icc3"			\
+		);							\
+									\
+	return old;							\
+}									\
+ATOMIC_EXPORT(__atomic64_fetch_##op);
+#endif
+
+ATOMIC_FETCH_OP(or)
+ATOMIC_FETCH_OP(and)
+ATOMIC_FETCH_OP(xor)
+
+ATOMIC_OP_RETURN(add)
+ATOMIC_OP_RETURN(sub)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_QUALS
+#undef ATOMIC_EXPORT
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 96de220..0df8e95 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -25,109 +25,30 @@
 
 #include <asm-generic/bitops/ffz.h>
 
-#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-static inline
-unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	and%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(~mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-static inline
-unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	or%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-static inline
-unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	xor%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-#else
-
-extern unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v);
-extern unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v);
-extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v);
-
-#endif
-
-#define atomic_clear_mask(mask, v)	atomic_test_and_ANDNOT_mask((mask), (v))
-#define atomic_set_mask(mask, v)	atomic_test_and_OR_mask((mask), (v))
+#include <asm/atomic.h>
 
 static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_and(~mask, ptr) & mask) != 0;
 }
 
 static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_or(mask, ptr) & mask) != 0;
 }
 
 static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_xor(mask, ptr) & mask) != 0;
 }
 
 static inline void clear_bit(unsigned long nr, volatile void *addr)
diff --git a/arch/frv/kernel/dma.c b/arch/frv/kernel/dma.c
index 156184e..370dc9f 100644
--- a/arch/frv/kernel/dma.c
+++ b/arch/frv/kernel/dma.c
@@ -109,13 +109,13 @@ static struct frv_dma_channel frv_dma_channels[FRV_DMA_NCHANS] = {
 
 static DEFINE_RWLOCK(frv_dma_channels_lock);
 
-unsigned long frv_dma_inprogress;
+unsigned int frv_dma_inprogress;
 
 #define frv_clear_dma_inprogress(channel) \
-	atomic_clear_mask(1 << (channel), &frv_dma_inprogress);
+	(void)__atomic32_fetch_and(~(1 << (channel)), &frv_dma_inprogress);
 
 #define frv_set_dma_inprogress(channel) \
-	atomic_set_mask(1 << (channel), &frv_dma_inprogress);
+	(void)__atomic32_fetch_or(1 << (channel), &frv_dma_inprogress);
 
 /*****************************************************************************/
 /*
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 86c516d..cdb4ce9 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -58,11 +58,6 @@ EXPORT_SYMBOL(__outsl_ns);
 EXPORT_SYMBOL(__insl_ns);
 
 #ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-EXPORT_SYMBOL(atomic_test_and_ANDNOT_mask);
-EXPORT_SYMBOL(atomic_test_and_OR_mask);
-EXPORT_SYMBOL(atomic_test_and_XOR_mask);
-EXPORT_SYMBOL(atomic_add_return);
-EXPORT_SYMBOL(atomic_sub_return);
 EXPORT_SYMBOL(__xchg_32);
 EXPORT_SYMBOL(__cmpxchg_32);
 #endif
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 4ff2fb1..970e8b4 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o atomic-lib.o
diff --git a/arch/frv/lib/atomic-lib.c b/arch/frv/lib/atomic-lib.c
new file mode 100644
index 0000000..4d1b887
--- /dev/null
+++ b/arch/frv/lib/atomic-lib.c
@@ -0,0 +1,7 @@
+
+#include <linux/export.h>
+#include <asm/atomic.h>
+
+#define __ATOMIC_LIB__
+
+#include <asm/atomic_defs.h>
diff --git a/arch/frv/lib/atomic-ops.S b/arch/frv/lib/atomic-ops.S
index 5e9e6ab..b7439a9 100644
--- a/arch/frv/lib/atomic-ops.S
+++ b/arch/frv/lib/atomic-ops.S
@@ -19,116 +19,6 @@
 
 ###############################################################################
 #
-# unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_ANDNOT_mask
-        .type		atomic_test_and_ANDNOT_mask,@function
-atomic_test_and_ANDNOT_mask:
-	not.p		gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	and		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_ANDNOT_mask, .-atomic_test_and_ANDNOT_mask
-
-###############################################################################
-#
-# unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_OR_mask
-        .type		atomic_test_and_OR_mask,@function
-atomic_test_and_OR_mask:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	or		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_OR_mask, .-atomic_test_and_OR_mask
-
-###############################################################################
-#
-# unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_XOR_mask
-        .type		atomic_test_and_XOR_mask,@function
-atomic_test_and_XOR_mask:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	xor		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_XOR_mask, .-atomic_test_and_XOR_mask
-
-###############################################################################
-#
-# int atomic_add_return(int i, atomic_t *v)
-#
-###############################################################################
-	.globl		atomic_add_return
-        .type		atomic_add_return,@function
-atomic_add_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	add		gr8,gr10,gr8
-	cst.p		gr8,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_add_return, .-atomic_add_return
-
-###############################################################################
-#
-# int atomic_sub_return(int i, atomic_t *v)
-#
-###############################################################################
-	.globl		atomic_sub_return
-        .type		atomic_sub_return,@function
-atomic_sub_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	sub		gr8,gr10,gr8
-	cst.p		gr8,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_sub_return, .-atomic_sub_return
-
-###############################################################################
-#
 # uint32_t __xchg_32(uint32_t i, uint32_t *v)
 #
 ###############################################################################
diff --git a/arch/frv/lib/atomic64-ops.S b/arch/frv/lib/atomic64-ops.S
index b6194ee..c4c4723 100644
--- a/arch/frv/lib/atomic64-ops.S
+++ b/arch/frv/lib/atomic64-ops.S
@@ -20,100 +20,6 @@
 
 ###############################################################################
 #
-# long long atomic64_inc_return(atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_inc_return
-        .type		atomic64_inc_return,@function
-atomic64_inc_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	addicc		gr9,#1,gr9,icc0
-	addxi		gr8,#0,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_inc_return, .-atomic64_inc_return
-
-###############################################################################
-#
-# long long atomic64_dec_return(atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_dec_return
-        .type		atomic64_dec_return,@function
-atomic64_dec_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	subicc		gr9,#1,gr9,icc0
-	subxi		gr8,#0,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_dec_return, .-atomic64_dec_return
-
-###############################################################################
-#
-# long long atomic64_add_return(long long i, atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_add_return
-        .type		atomic64_add_return,@function
-atomic64_add_return:
-	or.p		gr8,gr8,gr4
-	or		gr9,gr9,gr5
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	addcc		gr9,gr5,gr9,icc0
-	addx		gr8,gr4,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_add_return, .-atomic64_add_return
-
-###############################################################################
-#
-# long long atomic64_sub_return(long long i, atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_sub_return
-        .type		atomic64_sub_return,@function
-atomic64_sub_return:
-	or.p		gr8,gr8,gr4
-	or		gr9,gr9,gr5
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	subcc		gr9,gr5,gr9,icc0
-	subx		gr8,gr4,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_sub_return, .-atomic64_sub_return
-
-###############################################################################
-#
 # uint64_t __xchg_64(uint64_t i, uint64_t *v)
 #
 ###############################################################################
-- 
cgit v0.10.2


From 73ada3700bbb0a4c7cc06ea8d74e93c689f90cdb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 10 Jul 2015 12:55:45 +0200
Subject: h8300: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Also rework the atomic implementation in terms of CPP macros to avoid
the typical repetition -- I seem to have missed this arch the last
time around when I did that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 7ca73f8..f181f82 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -16,83 +16,54 @@
 
 #include <linux/kernel.h>
 
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	ret = v->counter += i;
-	arch_local_irq_restore(flags);
-	return ret;
+#define ATOMIC_OP_RETURN(op, c_op)				\
+static inline int atomic_##op##_return(int i, atomic_t *v)	\
+{								\
+	h8300flags flags;					\
+	int ret;						\
+								\
+	flags = arch_local_irq_save();				\
+	ret = v->counter c_op i;				\
+	arch_local_irq_restore(flags);				\
+	return ret;						\
 }
 
-#define atomic_add(i, v) atomic_add_return(i, v)
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	ret = v->counter -= i;
-	arch_local_irq_restore(flags);
-	return ret;
+#define ATOMIC_OP(op, c_op)					\
+static inline void atomic_##op(int i, atomic_t *v)		\
+{								\
+	h8300flags flags;					\
+								\
+	flags = arch_local_irq_save();				\
+	v->counter c_op i;					\
+	arch_local_irq_restore(flags);				\
 }
 
-#define atomic_sub(i, v) atomic_sub_return(i, v)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+ATOMIC_OP_RETURN(add, +=)
+ATOMIC_OP_RETURN(sub, -=)
 
-static inline int atomic_inc_return(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	v->counter++;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret;
-}
+#define CONFIG_ARCH_HAS_ATOMIC_OR
 
-#define atomic_inc(v) atomic_inc_return(v)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or,  |=)
+ATOMIC_OP(xor, ^=)
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
 
-static inline int atomic_dec_return(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
+#define atomic_add(i, v)		(void)atomic_add_return(i, v)
+#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 
-	flags = arch_local_irq_save();
-	--v->counter;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret;
-}
+#define atomic_sub(i, v)		(void)atomic_sub_return(i, v)
+#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
 
-#define atomic_dec(v) atomic_dec_return(v)
+#define atomic_inc_return(v)		atomic_add_return(1, v)
+#define atomic_dec_return(v)		atomic_sub_return(1, v)
 
-static inline int atomic_dec_and_test(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
+#define atomic_inc(v)			(void)atomic_inc_return(v)
+#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 
-	flags = arch_local_irq_save();
-	--v->counter;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret == 0;
-}
+#define atomic_dec(v)			(void)atomic_dec_return(v)
+#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -120,40 +91,14 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return ret;
 }
 
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *v)
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned char ccr;
-	unsigned long tmp;
-
-	__asm__ __volatile__("stc ccr,%w3\n\t"
-			     "orc #0x80,ccr\n\t"
-			     "mov.l %0,%1\n\t"
-			     "and.l %2,%1\n\t"
-			     "mov.l %1,%0\n\t"
-			     "ldc %w3,ccr"
-			     : "=m"(*v), "=r"(tmp)
-			     : "g"(~(mask)), "r"(ccr));
+	atomic_and(~mask, v);
 }
 
-static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-	unsigned char ccr;
-	unsigned long tmp;
-
-	__asm__ __volatile__("stc ccr,%w3\n\t"
-			     "orc #0x80,ccr\n\t"
-			     "mov.l %0,%1\n\t"
-			     "or.l %2,%1\n\t"
-			     "mov.l %1,%0\n\t"
-			     "ldc %w3,ccr"
-			     : "=m"(*v), "=r"(tmp)
-			     : "g"(~(mask)), "r"(ccr));
+	atomic_or(mask, v);
 }
 
-/* Atomic operations are already serializing */
-#define smp_mb__before_atomic_dec()    barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc()    barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
 #endif /* __ARCH_H8300_ATOMIC __ */
-- 
cgit v0.10.2


From 2957c035395e492463d7f589af9dd32388967bbb Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@ezchip.com>
Date: Thu, 9 Jul 2015 16:38:17 -0400
Subject: tile: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

For tilegx, these are relatively straightforward; the architecture
provides atomic "or" and "and", both 32-bit and 64-bit.  To support
xor we provide a loop using "cmpexch".

For the older 32-bit tilepro architecture, we have to extend
the set of low-level assembly routines to include 32-bit "and",
as well as all three 64-bit routines.  Somewhat confusingly,
some 32-bit versions are already used by the bitops inlines, with
parameter types appropriate for bitops, so we have to do a bit of
casting to match "int" to "unsigned long".

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1436474297-32187-1-git-send-email-cmetcalf@ezchip.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 1b109fa..9423792 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,6 +34,21 @@ static inline void atomic_add(int i, atomic_t *v)
 	_atomic_xchg_add(&v->counter, i);
 }
 
+#define ATOMIC_OP(op)							\
+unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	_atomic_##op((unsigned long *)&v->counter, i);			\
+}
+
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
 /**
  * atomic_add_return - add integer and return
  * @v: pointer of type atomic_t
@@ -113,6 +128,17 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	_atomic64_xchg_add(&v->counter, i);
 }
 
+#define ATOMIC64_OP(op)						\
+long long _atomic64_##op(long long *v, long long n);		\
+static inline void atomic64_##op(long long i, atomic64_t *v)	\
+{								\
+	_atomic64_##op(&v->counter, i);				\
+}
+
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
 /**
  * atomic64_add_return - add integer and return
  * @v: pointer of type atomic64_t
@@ -225,6 +251,7 @@ extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
 extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
@@ -234,6 +261,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
 					long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
 					int *lock, long long o, long long n);
+extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 0496970..d07d9fc 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -58,6 +58,28 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return oldval;
 }
 
+#define CONFIG_ARCH_HAS_ATOMIC_OR
+
+static inline void atomic_and(int i, atomic_t *v)
+{
+	__insn_fetchand4((void *)&v->counter, i);
+}
+
+static inline void atomic_or(int i, atomic_t *v)
+{
+	__insn_fetchor4((void *)&v->counter, i);
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+	int guess, oldval = v->counter;
+	do {
+		guess = oldval;
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch4(&v->counter, guess ^ i);
+	} while (guess != oldval);
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -91,6 +113,26 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return oldval != u;
 }
 
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+	__insn_fetchand((void *)&v->counter, i);
+}
+
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+	__insn_fetchor((void *)&v->counter, i);
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+	long guess, oldval = v->counter;
+	do {
+		guess = oldval;
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch(&v->counter, guess ^ i);
+	} while (guess != oldval);
+}
+
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index c89b211..298df1e 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -94,6 +94,12 @@ unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
 }
 EXPORT_SYMBOL(_atomic_or);
 
+unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_and);
+
 unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
 {
 	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
@@ -136,6 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
+long long _atomic64_and(long long *v, long long n)
+{
+	return __atomic64_and(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_and);
+
+long long _atomic64_or(long long *v, long long n)
+{
+	return __atomic64_or(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_or);
+
+long long _atomic64_xor(long long *v, long long n)
+{
+	return __atomic64_xor(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 6bda313..f611265 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -178,6 +178,7 @@ atomic_op _xchg_add, 32, "add r24, r22, r2"
 atomic_op _xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
 atomic_op _or, 32, "or r24, r22, r2"
+atomic_op _and, 32, "and r24, r22, r2"
 atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
 atomic_op _xor, 32, "xor r24, r22, r2"
 
@@ -191,6 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \
 	{ bbns r26, 3f; add r24, r22, r4 }; \
 	{ bbns r27, 3f; add r25, r23, r5 }; \
 	slt_u r26, r24, r22; add r25, r25, r26"
+atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
 	jrp     lr              /* happy backtracer */
 
-- 
cgit v0.10.2


From e6942b7de2dfe44ebde9bae57dadece5abca9de8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:32:50 +0200
Subject: atomic: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 0eff853..e8c9560 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -110,7 +110,6 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index e90b701..2a84782 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -144,7 +144,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, &=, and)
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index ff214ba..82b75a7 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -194,7 +194,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, &=, and)
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 2876173..866a71f 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -85,7 +85,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, and)
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 115d300..97c9bdf 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -51,8 +51,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	(void)__atomic_##op##_return(i, v);				\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, and)
 ATOMIC_OP(or, or)
 ATOMIC_OP(xor, eor)
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index eafa55b..2d6a7a3 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -28,8 +28,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
 #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 #define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 74d2245..fc48bea 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -192,8 +192,6 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 	(void)__atomic64_fetch_##op(i, &v->counter);			\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(or)
 ATOMIC_OP(and)
 ATOMIC_OP(xor)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index f181f82..c4d061f 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=)
 ATOMIC_OP(or,  |=)
 ATOMIC_OP(xor, ^=)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 4efe2c7..811d61f 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -132,8 +132,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0809ef5..be4beeb 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -69,8 +69,6 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &)
 ATOMIC_OP(or, |)
 ATOMIC_OP(xor, ^)
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 7245463..b2a13fb 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -94,8 +94,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index c30e43e..93ebd96 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -77,8 +77,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=, and)
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, eor)
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 930c12c..0642606 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -74,8 +74,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0430ba6..4c42fd9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -137,8 +137,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=, and)
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, xor)
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 03eea81..f5a63f0 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -89,8 +89,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index be2c50d..2536965 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -126,8 +126,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=)
 ATOMIC_OP(or, |=)
 ATOMIC_OP(xor, ^=)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 6ca89e2..55f106e 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -67,8 +67,6 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, and)
 ATOMIC_OP(or, or)
 ATOMIC_OP(xor, xor)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index b3859d8..d761aef 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -282,8 +282,6 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC64_OP(and, AND)
 ATOMIC64_OP(or, OR)
 ATOMIC64_OP(xor, XOR)
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index 4b03830..b94df40 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -48,8 +48,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index e19d888..7dcbebb 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -17,8 +17,6 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index d6af27c..917084a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -33,8 +33,6 @@ long atomic64_##op##_return(long, atomic64_t *);
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 9423792..d320ce2 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	_atomic_##op((unsigned long *)&v->counter, i);			\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index d07d9fc..096a56d 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -58,8 +58,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return oldval;
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 static inline void atomic_and(int i, atomic_t *v)
 {
 	__insn_fetchand4((void *)&v->counter, i);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index f3a3ec0..b349302 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -191,8 +191,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 			: "memory");					\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 4dd2450..31371f4 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -145,8 +145,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 92947e0..a41b0b8 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -102,24 +102,27 @@ ATOMIC_OP_RETURN(sub, -)
 ATOMIC_OP(and, &)
 #endif
 
-#ifndef atomic_clear_mask
-#define atomic_clear_mask(i, v) atomic_and(~(i), (v))
-#endif
-
 #ifndef atomic_or
-#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-#endif
 ATOMIC_OP(or, |)
 #endif
 
-#ifndef atomic_set_mask
-#define atomic_set_mask(i, v)	atomic_or((i), (v))
+#ifndef atomic_xor
+ATOMIC_OP(xor, ^)
 #endif
 
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 30ad9c8..d48e78c 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v);
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
 
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5b08a85..7d62790 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -111,19 +111,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
-static inline void atomic_or(int i, atomic_t *v)
-{
-	int old;
-	int new;
-
-	do {
-		old = atomic_read(v);
-		new = old | i;
-	} while (atomic_cmpxchg(v, old, new) != old);
-}
-#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
-
 #include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 1298c05e..2886eba 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -102,6 +102,9 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
+ATOMIC64_OP(and, &=)
+ATOMIC64_OP(or, |=)
+ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
-- 
cgit v0.10.2


From de9e432cb5de1bf2952919dc0b22e4bec0ed8d53 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Apr 2015 01:12:32 +0200
Subject: atomic: Collapse all atomic_{set,clear}_mask definitions

Move the now generic definitions of atomic_{set,clear}_mask() into
linux/atomic.h to avoid endless and pointless repetition.

Also, provide an atomic_andnot() wrapper for those few archs that can
implement that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 2a84782..d8a85e7 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -155,16 +155,6 @@ ATOMIC_OP(xor, ^=, xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 2d6a7a3..1c1c423 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -32,16 +32,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif
 
 #include <asm-generic/atomic.h>
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index fc48bea..0da689d 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -198,14 +198,4 @@ ATOMIC_OP(xor)
 
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index c4d061f..702ee53 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -89,14 +89,4 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return ret;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index b2a13fb..025e2a1 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -243,15 +243,4 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-static __inline__ __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static __inline__ __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 93ebd96..039fac1 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -174,16 +174,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 0642606..21c4c26 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -82,16 +82,6 @@ ATOMIC_OP(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int result, temp;
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 7d88725..f8efe38 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -76,16 +76,6 @@ ATOMIC_OP(xor, ^=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index f5a63f0..375e591 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -131,30 +131,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-/**
- * atomic_clear_mask - Atomically clear bits in memory
- * @mask: Mask of the bits to be cleared
- * @v: pointer to word in memory
- *
- * Atomically clears the bits set in mask from the memory word specified.
- */
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-/**
- * atomic_set_mask - Atomically set bits in memory
- * @mask: Mask of the bits to be set
- * @v: pointer to word in memory
- *
- * Atomically sets the bits set in mask from the memory word specified.
- */
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* __KERNEL__ */
 #endif /* CONFIG_SMP */
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7c6bb4b..ed3ab50 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -596,25 +596,6 @@ _GLOBAL(copy_page)
 	b	2b
 
 /*
- * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
- * void atomic_set_mask(atomic_t mask, atomic_t *addr);
- */
-_GLOBAL(atomic_clear_mask)
-10:	lwarx	r5,0,r4
-	andc	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-_GLOBAL(atomic_set_mask)
-10:	lwarx	r5,0,r4
-	or	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-
-/*
  * Extended precision shifts.
  *
  * Updated to be valid for shift counts from 0 to 63 inclusive.
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index d761aef..117fa5c 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -132,16 +132,6 @@ ATOMIC_OP(xor, XOR)
 
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index cee0245..05b9f74 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,16 +25,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b349302..fb52aa6 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -234,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v)
 	return *v;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 31371f4..e0be679 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -153,16 +153,6 @@ ATOMIC_OP(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
 /**
  * atomic_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index a41b0b8..d4d7e33 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -113,16 +113,6 @@ ATOMIC_OP(xor, ^)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 7d62790..8b98b42 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -28,6 +28,23 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 #endif
 
+#ifndef atomic_andnot
+static inline void atomic_andnot(int i, atomic_t *v)
+{
+	atomic_and(~i, v);
+}
+#endif
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_andnot(mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
@@ -115,4 +132,12 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
+
+#ifndef atomic64_andnot
+static inline void atomic64_andnot(long long i, atomic64_t *v)
+{
+	atomic64_and(~i, v);
+}
+#endif
+
 #endif /* _LINUX_ATOMIC_H */
-- 
cgit v0.10.2


From 805de8f43c20ba8b479bb598b543fa86b20067f6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Apr 2015 01:12:32 +0200
Subject: atomic: Replace atomic_{set,clear}_mask() usage

Replace the deprecated atomic_{set,clear}_mask() usage with the now
ubiquous atomic_{or,andnot}() functions.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 1c72595..0030e21 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -195,7 +195,7 @@ void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
 	local_irq_save(flags);
 	for_each_cpu(cpu, cpumask) {
 		bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
-		atomic_set_mask((1 << msg), &bfin_ipi_data->bits);
+		atomic_or((1 << msg), &bfin_ipi_data->bits);
 		atomic_inc(&bfin_ipi_data->count);
 	}
 	local_irq_restore(flags);
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index c18ddc7..62d6961 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -156,7 +156,7 @@ void smp_flush_cache_all(void)
 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
 	spin_lock(&flushcache_lock);
 	mask=cpumask_bits(&cpumask);
-	atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask);
+	atomic_or(*mask, (atomic_t *)&flushcache_cpumask);
 	send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0);
 	_flush_cache_copyback_all();
 	while (flushcache_cpumask)
@@ -407,7 +407,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	flush_vma = vma;
 	flush_va = va;
 	mask=cpumask_bits(&cpumask);
-	atomic_set_mask(*mask, (atomic_t *)&flush_cpumask);
+	atomic_or(*mask, (atomic_t *)&flush_cpumask);
 
 	/*
 	 * We have to send the IPI only to
diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c
index e5d0ef7..9a39ea9 100644
--- a/arch/mn10300/mm/tlb-smp.c
+++ b/arch/mn10300/mm/tlb-smp.c
@@ -119,7 +119,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	flush_mm = mm;
 	flush_va = va;
 #if NR_CPUS <= BITS_PER_LONG
-	atomic_set_mask(cpumask.bits[0], &flush_cpumask.bits[0]);
+	atomic_or(cpumask.bits[0], (atomic_t *)&flush_cpumask.bits[0]);
 #else
 #error Not supported.
 #endif
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9e733d9..f5a0bd7 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -381,7 +381,7 @@ static void disable_sync_clock(void *dummy)
 	 * increase the "sequence" counter to avoid the race of an
 	 * etr event and the complete recovery against get_sync_clock.
 	 */
-	atomic_clear_mask(0x80000000, sw_ptr);
+	atomic_andnot(0x80000000, sw_ptr);
 	atomic_inc(sw_ptr);
 }
 
@@ -392,7 +392,7 @@ static void disable_sync_clock(void *dummy)
 static void enable_sync_clock(void)
 {
 	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
-	atomic_set_mask(0x80000000, sw_ptr);
+	atomic_or(0x80000000, sw_ptr);
 }
 
 /*
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c98d897..57309e9 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -170,20 +170,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
 	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
 	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
-			  &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+		    &vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
 	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
 
@@ -196,7 +196,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 {
-	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
@@ -919,7 +919,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
+	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
 	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
 }
 
@@ -1020,7 +1020,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
 	li->irq.ext = irq->u.ext;
 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1035,7 +1035,7 @@ static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
 		/* another external call is pending */
 		return -EBUSY;
 	}
-	atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
 	return 0;
 }
 
@@ -1061,7 +1061,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
 		return -EBUSY;
 	*extcall = irq->u.extcall;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1133,7 +1133,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1177,7 +1177,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 				   0, 0, 2);
 
 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1190,7 +1190,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 				   0, 0, 2);
 
 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1369,13 +1369,13 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 	spin_lock(&li->lock);
 	switch (type) {
 	case KVM_S390_MCHK:
-		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		atomic_or(CPUSTAT_IO_INT, li->cpuflags);
 		break;
 	default:
-		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 		break;
 	}
 	spin_unlock(&li->lock);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2078f92..b73302f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1215,12 +1215,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
-	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		save_fp_ctl(&vcpu->run->s.regs.fpc);
@@ -1320,9 +1320,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 						    CPUSTAT_STOPPED);
 
 	if (test_kvm_facility(vcpu->kvm, 78))
-		atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
 	else if (test_kvm_facility(vcpu->kvm, 8))
-		atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
 
 	kvm_s390_vcpu_setup_model(vcpu);
 
@@ -1422,24 +1422,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
 	exit_sie(vcpu);
 }
 
 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
 }
 
 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 	exit_sie(vcpu);
 }
 
 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 }
 
 /*
@@ -1448,7 +1448,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
  * return immediately. */
 void exit_sie(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
 		cpu_relax();
 }
@@ -1672,19 +1672,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
 		vcpu->guest_debug = dbg->control;
 		/* enforce guest PER */
-		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 
 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
 			rc = kvm_s390_import_bp_data(vcpu, dbg);
 	} else {
-		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 		vcpu->arch.guestdbg.last_bp = 0;
 	}
 
 	if (rc) {
 		vcpu->guest_debug = 0;
 		kvm_s390_clear_bp_data(vcpu);
-		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 	}
 
 	return rc;
@@ -1771,7 +1771,7 @@ retry:
 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
 		if (!ibs_enabled(vcpu)) {
 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
-			atomic_set_mask(CPUSTAT_IBS,
+			atomic_or(CPUSTAT_IBS,
 					&vcpu->arch.sie_block->cpuflags);
 		}
 		goto retry;
@@ -1780,7 +1780,7 @@ retry:
 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
 		if (ibs_enabled(vcpu)) {
 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
-			atomic_clear_mask(CPUSTAT_IBS,
+			atomic_andnot(CPUSTAT_IBS,
 					  &vcpu->arch.sie_block->cpuflags);
 		}
 		goto retry;
@@ -2280,7 +2280,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 		__disable_ibs_on_all_vcpus(vcpu->kvm);
 	}
 
-	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 	/*
 	 * Another VCPU might have used IBS while we were offline.
 	 * Let's play safe and flush the VCPU at startup.
@@ -2306,7 +2306,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
 	kvm_s390_clear_stop_irq(vcpu);
 
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 	__disable_ibs_on_vcpu(vcpu);
 
 	for (i = 0; i < online_vcpus; i++) {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 884b4f9..8917c98 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -748,7 +748,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+			atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 52b446b..7a918d1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5091,7 +5091,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 984e2fe..449a95c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2446,7 +2446,7 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
 			kobject_uevent_env(&dev->primary->kdev->kobj,
 					   KOBJ_CHANGE, reset_done_event);
 		} else {
-			atomic_set_mask(I915_WEDGED, &error->reset_counter);
+			atomic_or(I915_WEDGED, &error->reset_counter);
 		}
 
 		/*
@@ -2574,7 +2574,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	i915_report_and_clear_eir(dev);
 
 	if (wedged) {
-		atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
+		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
 				&dev_priv->gpu_error.reset_counter);
 
 		/*
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 01a7339..c00ac46 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -529,7 +529,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
 	list_add_tail(&port->list, &adapter->port_list);
 	write_unlock_irq(&adapter->port_list_lock);
 
-	atomic_set_mask(status | ZFCP_STATUS_COMMON_RUNNING, &port->status);
+	atomic_or(status | ZFCP_STATUS_COMMON_RUNNING, &port->status);
 
 	return port;
 
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index acde3f5..3fb4109 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -190,7 +190,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 		if (!(act_status & ZFCP_STATUS_ERP_NO_REF))
 			if (scsi_device_get(sdev))
 				return NULL;
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE,
 				&zfcp_sdev->status);
 		erp_action = &zfcp_sdev->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
@@ -206,7 +206,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 		if (!get_device(&port->dev))
 			return NULL;
 		zfcp_erp_action_dismiss_port(port);
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
 		erp_action = &port->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
 		erp_action->port = port;
@@ -217,7 +217,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 	case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
 		kref_get(&adapter->ref);
 		zfcp_erp_action_dismiss_adapter(adapter);
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
 		erp_action = &adapter->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
 		if (!(atomic_read(&adapter->status) &
@@ -254,7 +254,7 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 	act = zfcp_erp_setup_act(need, act_status, adapter, port, sdev);
 	if (!act)
 		goto out;
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
 	++adapter->erp_total_count;
 	list_add_tail(&act->list, &adapter->erp_ready_head);
 	wake_up(&adapter->erp_ready_wq);
@@ -486,14 +486,14 @@ static void zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
 {
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status))
 		zfcp_dbf_rec_run("eraubl1", &adapter->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
 }
 
 static void zfcp_erp_port_unblock(struct zfcp_port *port)
 {
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status))
 		zfcp_dbf_rec_run("erpubl1", &port->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status);
 }
 
 static void zfcp_erp_lun_unblock(struct scsi_device *sdev)
@@ -502,7 +502,7 @@ static void zfcp_erp_lun_unblock(struct scsi_device *sdev)
 
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status))
 		zfcp_dbf_rec_run("erlubl1", &sdev_to_zfcp(sdev)->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status);
 }
 
 static void zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action)
@@ -642,7 +642,7 @@ static void zfcp_erp_wakeup(struct zfcp_adapter *adapter)
 	read_lock_irqsave(&adapter->erp_lock, flags);
 	if (list_empty(&adapter->erp_ready_head) &&
 	    list_empty(&adapter->erp_running_head)) {
-			atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+			atomic_andnot(ZFCP_STATUS_ADAPTER_ERP_PENDING,
 					  &adapter->status);
 			wake_up(&adapter->erp_done_wqh);
 	}
@@ -665,16 +665,16 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
 	int sleep = 1;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
+	atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
 
 	for (retries = 7; retries; retries--) {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+		atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				  &adapter->status);
 		write_lock_irq(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
 		write_unlock_irq(&adapter->erp_lock);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
-			atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+			atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 					  &adapter->status);
 			return ZFCP_ERP_FAILED;
 		}
@@ -692,7 +692,7 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
 		sleep *= 2;
 	}
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+	atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 			  &adapter->status);
 
 	if (!(atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_XCONFIG_OK))
@@ -764,7 +764,7 @@ static void zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *act)
 	/* all ports and LUNs are closed */
 	zfcp_erp_clear_adapter_status(adapter, ZFCP_STATUS_COMMON_OPEN);
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
+	atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
 			  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
 }
 
@@ -773,7 +773,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
 	struct zfcp_adapter *adapter = act->adapter;
 
 	if (zfcp_qdio_open(adapter->qdio)) {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
+		atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
 				  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
 				  &adapter->status);
 		return ZFCP_ERP_FAILED;
@@ -784,7 +784,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
 		return ZFCP_ERP_FAILED;
 	}
 
-	atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &adapter->status);
+	atomic_or(ZFCP_STATUS_COMMON_OPEN, &adapter->status);
 
 	return ZFCP_ERP_SUCCEEDED;
 }
@@ -948,7 +948,7 @@ static void zfcp_erp_lun_strategy_clearstati(struct scsi_device *sdev)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED,
+	atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_DENIED,
 			  &zfcp_sdev->status);
 }
 
@@ -1187,18 +1187,18 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
 	switch (erp_action->action) {
 	case ZFCP_ERP_ACTION_REOPEN_LUN:
 		zfcp_sdev = sdev_to_zfcp(erp_action->sdev);
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &zfcp_sdev->status);
 		break;
 
 	case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
 	case ZFCP_ERP_ACTION_REOPEN_PORT:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &erp_action->port->status);
 		break;
 
 	case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &erp_action->adapter->status);
 		break;
 	}
@@ -1422,19 +1422,19 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 	unsigned long flags;
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 
-	atomic_set_mask(mask, &adapter->status);
+	atomic_or(mask, &adapter->status);
 
 	if (!common_mask)
 		return;
 
 	read_lock_irqsave(&adapter->port_list_lock, flags);
 	list_for_each_entry(port, &adapter->port_list, list)
-		atomic_set_mask(common_mask, &port->status);
+		atomic_or(common_mask, &port->status);
 	read_unlock_irqrestore(&adapter->port_list_lock, flags);
 
 	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, adapter->scsi_host)
-		atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
+		atomic_or(common_mask, &sdev_to_zfcp(sdev)->status);
 	spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
 }
 
@@ -1453,7 +1453,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 	u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
 
-	atomic_clear_mask(mask, &adapter->status);
+	atomic_andnot(mask, &adapter->status);
 
 	if (!common_mask)
 		return;
@@ -1463,7 +1463,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 
 	read_lock_irqsave(&adapter->port_list_lock, flags);
 	list_for_each_entry(port, &adapter->port_list, list) {
-		atomic_clear_mask(common_mask, &port->status);
+		atomic_andnot(common_mask, &port->status);
 		if (clear_counter)
 			atomic_set(&port->erp_counter, 0);
 	}
@@ -1471,7 +1471,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 
 	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, adapter->scsi_host) {
-		atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
+		atomic_andnot(common_mask, &sdev_to_zfcp(sdev)->status);
 		if (clear_counter)
 			atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
 	}
@@ -1491,7 +1491,7 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 	unsigned long flags;
 
-	atomic_set_mask(mask, &port->status);
+	atomic_or(mask, &port->status);
 
 	if (!common_mask)
 		return;
@@ -1499,7 +1499,7 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
 	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port)
-			atomic_set_mask(common_mask,
+			atomic_or(common_mask,
 					&sdev_to_zfcp(sdev)->status);
 	spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
 }
@@ -1518,7 +1518,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
 	u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
 	unsigned long flags;
 
-	atomic_clear_mask(mask, &port->status);
+	atomic_andnot(mask, &port->status);
 
 	if (!common_mask)
 		return;
@@ -1529,7 +1529,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
 	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port) {
-			atomic_clear_mask(common_mask,
+			atomic_andnot(common_mask,
 					  &sdev_to_zfcp(sdev)->status);
 			if (clear_counter)
 				atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
@@ -1546,7 +1546,7 @@ void zfcp_erp_set_lun_status(struct scsi_device *sdev, u32 mask)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_set_mask(mask, &zfcp_sdev->status);
+	atomic_or(mask, &zfcp_sdev->status);
 }
 
 /**
@@ -1558,7 +1558,7 @@ void zfcp_erp_clear_lun_status(struct scsi_device *sdev, u32 mask)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(mask, &zfcp_sdev->status);
+	atomic_andnot(mask, &zfcp_sdev->status);
 
 	if (mask & ZFCP_STATUS_COMMON_ERP_FAILED)
 		atomic_set(&zfcp_sdev->erp_counter, 0);
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 25d49f3..237688a 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -508,7 +508,7 @@ static void zfcp_fc_adisc_handler(void *data)
 	/* port is good, unblock rport without going through erp */
 	zfcp_scsi_schedule_rport_register(port);
  out:
-	atomic_clear_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 	put_device(&port->dev);
 	kmem_cache_free(zfcp_fc_req_cache, fc_req);
 }
@@ -564,14 +564,14 @@ void zfcp_fc_link_test_work(struct work_struct *work)
 	if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
 		goto out;
 
-	atomic_set_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_or(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 
 	retval = zfcp_fc_adisc(port);
 	if (retval == 0)
 		return;
 
 	/* send of ADISC was not possible */
-	atomic_clear_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 	zfcp_erp_port_forced_reopen(port, 0, "fcltwk1");
 
 out:
@@ -640,7 +640,7 @@ static void zfcp_fc_validate_port(struct zfcp_port *port, struct list_head *lh)
 	if (!(atomic_read(&port->status) & ZFCP_STATUS_COMMON_NOESC))
 		return;
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_NOESC, &port->status);
+	atomic_andnot(ZFCP_STATUS_COMMON_NOESC, &port->status);
 
 	if ((port->supported_classes != 0) ||
 	    !list_empty(&port->unit_list))
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 21ec5e2..27b976a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -114,7 +114,7 @@ static void zfcp_fsf_link_down_info_eval(struct zfcp_fsf_req *req,
 	if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED)
 		return;
 
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
 
 	zfcp_scsi_schedule_rports_block(adapter);
 
@@ -345,7 +345,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req)
 		zfcp_erp_adapter_shutdown(adapter, 0, "fspse_3");
 		break;
 	case FSF_PROT_HOST_CONNECTION_INITIALIZING:
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+		atomic_or(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				&adapter->status);
 		break;
 	case FSF_PROT_DUPLICATE_REQUEST_ID:
@@ -554,7 +554,7 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req)
 			zfcp_erp_adapter_shutdown(adapter, 0, "fsecdh1");
 			return;
 		}
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+		atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
 				&adapter->status);
 		break;
 	case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE:
@@ -567,7 +567,7 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req)
 
 		/* avoids adapter shutdown to be able to recognize
 		 * events such as LINK UP */
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+		atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
 				&adapter->status);
 		zfcp_fsf_link_down_info_eval(req,
 			&qtcb->header.fsf_status_qual.link_down_info);
@@ -1394,9 +1394,9 @@ static void zfcp_fsf_open_port_handler(struct zfcp_fsf_req *req)
 		break;
 	case FSF_GOOD:
 		port->handle = header->port_handle;
-		atomic_set_mask(ZFCP_STATUS_COMMON_OPEN |
+		atomic_or(ZFCP_STATUS_COMMON_OPEN |
 				ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_BOXED,
+		atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_BOXED,
 		                  &port->status);
 		/* check whether D_ID has changed during open */
 		/*
@@ -1677,10 +1677,10 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req)
 	case FSF_PORT_BOXED:
 		/* can't use generic zfcp_erp_modify_port_status because
 		 * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */
-		atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+		atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
 		shost_for_each_device(sdev, port->adapter->scsi_host)
 			if (sdev_to_zfcp(sdev)->port == port)
-				atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+				atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
 						  &sdev_to_zfcp(sdev)->status);
 		zfcp_erp_set_port_status(port, ZFCP_STATUS_COMMON_ACCESS_BOXED);
 		zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED,
@@ -1700,10 +1700,10 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req)
 		/* can't use generic zfcp_erp_modify_port_status because
 		 * ZFCP_STATUS_COMMON_OPEN must not be reset for the port
 		 */
-		atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+		atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
 		shost_for_each_device(sdev, port->adapter->scsi_host)
 			if (sdev_to_zfcp(sdev)->port == port)
-				atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+				atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
 						  &sdev_to_zfcp(sdev)->status);
 		break;
 	}
@@ -1766,7 +1766,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
 
 	zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
+	atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_COMMON_ACCESS_BOXED,
 			  &zfcp_sdev->status);
 
@@ -1822,7 +1822,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
 
 	case FSF_GOOD:
 		zfcp_sdev->lun_handle = header->lun_handle;
-		atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+		atomic_or(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
 		break;
 	}
 }
@@ -1913,7 +1913,7 @@ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req)
 		}
 		break;
 	case FSF_GOOD:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+		atomic_andnot(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
 		break;
 	}
 }
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 495e1cb..dbf2b547 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -349,7 +349,7 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio)
 
 	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
 	spin_lock_irq(&qdio->req_q_lock);
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
+	atomic_andnot(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
 	spin_unlock_irq(&qdio->req_q_lock);
 
 	wake_up(&qdio->req_q_wq);
@@ -384,7 +384,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 	if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)
 		return -EIO;
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
+	atomic_andnot(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
 			  &qdio->adapter->status);
 
 	zfcp_qdio_setup_init_data(&init_data, qdio);
@@ -396,14 +396,14 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 		goto failed_qdio;
 
 	if (ssqd.qdioac2 & CHSC_AC2_DATA_DIV_ENABLED)
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED,
+		atomic_or(ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED,
 				&qdio->adapter->status);
 
 	if (ssqd.qdioac2 & CHSC_AC2_MULTI_BUFFER_ENABLED) {
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
+		atomic_or(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
 		qdio->max_sbale_per_sbal = QDIO_MAX_ELEMENTS_PER_BUFFER;
 	} else {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
+		atomic_andnot(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
 		qdio->max_sbale_per_sbal = QDIO_MAX_ELEMENTS_PER_BUFFER - 1;
 	}
 
@@ -427,7 +427,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 	/* set index of first available SBALS / number of available SBALS */
 	qdio->req_q_idx = 0;
 	atomic_set(&qdio->req_q_free, QDIO_MAX_BUFFERS_PER_Q);
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &qdio->adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_QDIOUP, &qdio->adapter->status);
 
 	if (adapter->scsi_host) {
 		adapter->scsi_host->sg_tablesize = qdio->max_sbale_per_req;
@@ -499,6 +499,6 @@ void zfcp_qdio_siosl(struct zfcp_adapter *adapter)
 
 	rc = ccw_device_siosl(adapter->ccw_device);
 	if (!rc)
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
+		atomic_or(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
 				&adapter->status);
 }
-- 
cgit v0.10.2


From 41b9e9fcc1c44b84a785115058ce9c703e3fca6e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Jul 2015 12:55:58 +0200
Subject: atomic: Add simple atomic_t tests

Add a few atomic_t tests, gets some compile coverage for the new
operations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 0211d30..83c33a5b 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -16,8 +16,39 @@
 #include <linux/kernel.h>
 #include <linux/atomic.h>
 
+#define TEST(bit, op, c_op, val)				\
+do {								\
+	atomic##bit##_set(&v, v0);				\
+	r = v0;							\
+	atomic##bit##_##op(val, &v);				\
+	r c_op val;						\
+	WARN(atomic##bit##_read(&v) != r, "%Lx != %Lx\n",	\
+		(unsigned long long)atomic##bit##_read(&v),	\
+		(unsigned long long)r);				\
+} while (0)
+
+static __init void test_atomic(void)
+{
+	int v0 = 0xaaa31337;
+	int v1 = 0xdeadbeef;
+	int onestwos = 0x11112222;
+	int one = 1;
+
+	atomic_t v;
+	int r;
+
+	TEST(, add, +=, onestwos);
+	TEST(, add, +=, -one);
+	TEST(, sub, -=, onestwos);
+	TEST(, sub, -=, -one);
+	TEST(, or, |=, v1);
+	TEST(, and, &=, v1);
+	TEST(, xor, ^=, v1);
+	TEST(, andnot, &= ~, v1);
+}
+
 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
-static __init int test_atomic64(void)
+static __init void test_atomic64(void)
 {
 	long long v0 = 0xaaa31337c001d00dLL;
 	long long v1 = 0xdeadbeefdeafcafeLL;
@@ -34,15 +65,14 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 	BUG_ON(atomic64_read(&v) != r);
 
-	INIT(v0);
-	atomic64_add(onestwos, &v);
-	r += onestwos;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
-	atomic64_add(-one, &v);
-	r += -one;
-	BUG_ON(v.counter != r);
+	TEST(64, add, +=, onestwos);
+	TEST(64, add, +=, -one);
+	TEST(64, sub, -=, onestwos);
+	TEST(64, sub, -=, -one);
+	TEST(64, or, |=, v1);
+	TEST(64, and, &=, v1);
+	TEST(64, xor, ^=, v1);
+	TEST(64, andnot, &= ~, v1);
 
 	INIT(v0);
 	r += onestwos;
@@ -55,16 +85,6 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 	INIT(v0);
-	atomic64_sub(onestwos, &v);
-	r -= onestwos;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
-	atomic64_sub(-one, &v);
-	r -= -one;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
 	r -= onestwos;
 	BUG_ON(atomic64_sub_return(onestwos, &v) != r);
 	BUG_ON(v.counter != r);
@@ -147,6 +167,12 @@ static __init int test_atomic64(void)
 	BUG_ON(!atomic64_inc_not_zero(&v));
 	r += one;
 	BUG_ON(v.counter != r);
+}
+
+static __init int test_atomics(void)
+{
+	test_atomic();
+	test_atomic64();
 
 #ifdef CONFIG_X86
 	pr_info("passed for %s platform %s CX8 and %s SSE\n",
@@ -166,4 +192,4 @@ static __init int test_atomic64(void)
 	return 0;
 }
 
-core_initcall(test_atomic64);
+core_initcall(test_atomics);
-- 
cgit v0.10.2


From 0b792bf519e68108d577fcec815ab50913787012 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 21 Jul 2015 12:13:43 +0200
Subject: locking: Clean up pvqspinlock warning

 - Rename the on-stack variable to match the datastructure variable,

 - place the cmpxchg back under the comment that explains it,

 - clean up the WARN() statement to avoid superfluous conditionals
   and line-breaks.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index df19ae4..489a878 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -287,20 +287,21 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
 	struct pv_node *node;
-	u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+	u8 locked;
 
 	/*
 	 * We must not unlock if SLOW, because in that case we must first
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	if (likely(lockval == _Q_LOCKED_VAL))
+	locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+	if (likely(locked == _Q_LOCKED_VAL))
 		return;
 
-	if (unlikely(lockval != _Q_SLOW_VAL)) {
-		if (debug_locks_silent)
-			return;
-		WARN(1, "pvqspinlock: lock %p has corrupted value 0x%x!\n", lock, atomic_read(&lock->val));
+	if (unlikely(locked != _Q_SLOW_VAL)) {
+		WARN(!debug_locks_silent,
+		     "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
+		     (unsigned long)lock, atomic_read(&lock->val));
 		return;
 	}
 
-- 
cgit v0.10.2


From ed2de9f74ecbbf3063d29b2334e7b455d7f35189 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 16 Jul 2015 16:10:06 +0100
Subject: locking/Documentation: Clarify failed cmpxchg() memory ordering
 semantics

A failed cmpxchg does not provide any memory ordering guarantees, a
property that is used to optimise the cmpxchg implementations on Alpha,
PowerPC and arm64.

This patch updates atomic_ops.txt and memory-barriers.txt to reflect
this.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <waiman.long@hp.com>
Link: http://lkml.kernel.org/r/20150716151006.GH26390@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index dab6da3..b19fc34 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -266,7 +266,9 @@ with the given old and new values. Like all atomic_xxx operations,
 atomic_cmpxchg will only satisfy its atomicity semantics as long as all
 other accesses of *v are performed through atomic_xxx operations.
 
-atomic_cmpxchg must provide explicit memory barriers around the operation.
+atomic_cmpxchg must provide explicit memory barriers around the operation,
+although if the comparison fails then no memory ordering guarantees are
+required.
 
 The semantics for atomic_cmpxchg are the same as those defined for 'cas'
 below.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 13feb69..18fc860 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2383,9 +2383,7 @@ about the state (old or new) implies an SMP-conditional general memory barrier
 explicit lock operations, described later).  These include:
 
 	xchg();
-	cmpxchg();
 	atomic_xchg();			atomic_long_xchg();
-	atomic_cmpxchg();		atomic_long_cmpxchg();
 	atomic_inc_return();		atomic_long_inc_return();
 	atomic_dec_return();		atomic_long_dec_return();
 	atomic_add_return();		atomic_long_add_return();
@@ -2398,7 +2396,9 @@ explicit lock operations, described later).  These include:
 	test_and_clear_bit();
 	test_and_change_bit();
 
-	/* when succeeds (returns 1) */
+	/* when succeeds */
+	cmpxchg();
+	atomic_cmpxchg();		atomic_long_cmpxchg();
 	atomic_add_unless();		atomic_long_add_unless();
 
 These are used for such things as implementing ACQUIRE-class and RELEASE-class
-- 
cgit v0.10.2


From 3b3fdf10a8add87ef0050138d51bfee9ab4983df Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 13 Jul 2015 16:58:30 +0100
Subject: locking/pvqspinlock: Order pv_unhash() after cmpxchg() on unlock
 slowpath

When we unlock in __pv_queued_spin_unlock(), a failed cmpxchg() on the lock
value indicates that we need to take the slow-path and unhash the
corresponding node blocked on the lock.

Since a failed cmpxchg() does not provide any memory-ordering guarantees,
it is possible that the node data could be read before the cmpxchg() on
weakly-ordered architectures and therefore return a stale value, leading
to hash corruption and/or a BUG().

This patch adds an smb_rmb() following the failed cmpxchg operation, so
that the unhashing is ordered after the lock has been checked.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
[ Added more comments]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by:  Waiman Long <Waiman.Long@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Steve Capper <Steve.Capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150713155830.GL2632@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 489a878..ab8b1bb 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -244,13 +244,17 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 		if (!lp) { /* ONCE */
 			lp = pv_hash(lock, pn);
 			/*
-			 * lp must be set before setting _Q_SLOW_VAL
+			 * We must hash before setting _Q_SLOW_VAL, such that
+			 * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
+			 * we'll be sure to be able to observe our hash entry.
 			 *
-			 * [S] lp = lock                [RmW] l = l->locked = 0
-			 *     MB                             MB
-			 * [S] l->locked = _Q_SLOW_VAL  [L]   lp
+			 *   [S] pn->state
+			 *   [S] <hash>                 [Rmw] l->locked == _Q_SLOW_VAL
+			 *       MB                           RMB
+			 * [RmW] l->locked = _Q_SLOW_VAL  [L] <unhash>
+			 *                                [L] pn->state
 			 *
-			 * Matches the cmpxchg() in __pv_queued_spin_unlock().
+			 * Matches the smp_rmb() in __pv_queued_spin_unlock().
 			 */
 			if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
 				/*
@@ -306,6 +310,15 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	}
 
 	/*
+	 * A failed cmpxchg doesn't provide any memory-ordering guarantees,
+	 * so we need a barrier to order the read of the node data in
+	 * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
+	 *
+	 * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL.
+	 */
+	smp_rmb();
+
+	/*
 	 * Since the above failed to release, this must be the SLOW path.
 	 * Therefore start by looking up the blocked node and unhashing it.
 	 */
-- 
cgit v0.10.2


From ffffeaf318bd8da036eb8eb784b025a9f829201b Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Thu, 9 Jul 2015 12:32:22 -0400
Subject: locking/qrwlock: Reduce reader/writer to reader lock transfer latency

Currently, a reader will check first to make sure that the writer mode
byte is cleared before incrementing the reader count. That waiting is
not really necessary. It increases the latency in the reader/writer
to reader transition and reduces readers performance.

This patch eliminates that waiting. It also has the side effect
of reducing the chance of writer lock stealing and improving the
fairness of the lock. Using a locking microbenchmark, a 10-threads 5M
locking loop of mostly readers (RW ratio = 10,000:1) has the following
performance numbers in a Haswell-EX box:

        Kernel          Locking Rate (Kops/s)
        ------          ---------------------
        4.1.1               15,063,081
        4.1.1+patch         17,241,552  (+14.4%)

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1436459543-29126-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index d9c36c5..6a7a3b8 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -88,15 +88,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	arch_spin_lock(&lock->lock);
 
 	/*
-	 * At the head of the wait queue now, wait until the writer state
-	 * goes to 0 and then try to increment the reader count and get
-	 * the lock. It is possible that an incoming writer may steal the
-	 * lock in the interim, so it is necessary to check the writer byte
-	 * to make sure that the write lock isn't taken.
+	 * At the head of the wait queue now, increment the reader count
+	 * and wait until the writer, if it has the lock, has gone away.
+	 * At ths stage, it is not possible for a writer to remain in the
+	 * waiting state (_QW_WAITING). So there won't be any deadlock.
 	 */
-	while (atomic_read(&lock->cnts) & _QW_WMASK)
-		cpu_relax_lowlatency();
-
 	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
 
-- 
cgit v0.10.2


From 75d2270280686bff21b9ba66c7f3dd379c887981 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Sat, 11 Jul 2015 16:36:52 -0400
Subject: locking/pvqspinlock: Only kick CPU at unlock time

For an over-committed guest with more vCPUs than physical CPUs
available, it is possible that a vCPU may be kicked twice before
getting the lock - once before it becomes queue head and once again
before it gets the lock. All these CPU kicking and halting (VMEXIT)
can be expensive and slow down system performance.

This patch adds a new vCPU state (vcpu_hashed) which enables the code
to delay CPU kicking until at unlock time. Once this state is set,
the new lock holder will set _Q_SLOW_VAL and fill in the hash table
on behalf of the halted queue head vCPU. The original vcpu_halted
state will be used by pv_wait_node() only to differentiate other
queue nodes from the qeue head.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1436647018-49734-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38c4920..337c881 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+					   struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
 					   struct mcs_spinlock *node) { }
 
@@ -440,7 +440,7 @@ queue:
 		cpu_relax();
 
 	arch_mcs_spin_unlock_contended(&next->locked);
-	pv_kick_node(next);
+	pv_kick_node(lock, next);
 
 release:
 	/*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index ab8b1bb..c8e6e9a 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,9 +22,14 @@
 
 #define _Q_SLOW_VAL	(3U << _Q_LOCKED_OFFSET)
 
+/*
+ * Queue node uses: vcpu_running & vcpu_halted.
+ * Queue head uses: vcpu_running & vcpu_hashed.
+ */
 enum vcpu_state {
 	vcpu_running = 0,
-	vcpu_halted,
+	vcpu_halted,		/* Used only in pv_wait_node */
+	vcpu_hashed,		/* = pv_hash'ed + vcpu_halted */
 };
 
 struct pv_node {
@@ -153,7 +158,8 @@ static void pv_init_node(struct mcs_spinlock *node)
 
 /*
  * Wait for node->locked to become true, halt the vcpu after a short spin.
- * pv_kick_node() is used to wake the vcpu again.
+ * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
+ * behalf.
  */
 static void pv_wait_node(struct mcs_spinlock *node)
 {
@@ -172,9 +178,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 *
 		 * [S] pn->state = vcpu_halted	  [S] next->locked = 1
 		 *     MB			      MB
-		 * [L] pn->locked		[RmW] pn->state = vcpu_running
+		 * [L] pn->locked		[RmW] pn->state = vcpu_hashed
 		 *
-		 * Matches the xchg() from pv_kick_node().
+		 * Matches the cmpxchg() from pv_kick_node().
 		 */
 		smp_store_mb(pn->state, vcpu_halted);
 
@@ -182,9 +188,10 @@ static void pv_wait_node(struct mcs_spinlock *node)
 			pv_wait(&pn->state, vcpu_halted);
 
 		/*
-		 * Reset the vCPU state to avoid unncessary CPU kicking
+		 * If pv_kick_node() changed us to vcpu_hashed, retain that value
+		 * so that pv_wait_head() knows to not also try to hash this lock.
 		 */
-		WRITE_ONCE(pn->state, vcpu_running);
+		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
@@ -194,6 +201,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 * MCS lock will be released soon.
 		 */
 	}
+
 	/*
 	 * By now our node->locked should be 1 and our caller will not actually
 	 * spin-wait for it. We do however rely on our caller to do a
@@ -202,24 +210,35 @@ static void pv_wait_node(struct mcs_spinlock *node)
 }
 
 /*
- * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * Called after setting next->locked = 1 when we're the lock owner.
+ *
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state such
+ * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
  */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
+	struct __qspinlock *l = (void *)lock;
 
 	/*
-	 * Note that because node->locked is already set, this actual
-	 * mcs_spinlock entry could be re-used already.
+	 * If the vCPU is indeed halted, advance its state to match that of
+	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
+	 * observe its next->locked value and advance itself.
 	 *
-	 * This should be fine however, kicking people for no reason is
-	 * harmless.
+	 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
+	 */
+	if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted)
+		return;
+
+	/*
+	 * Put the lock into the hash table and set the _Q_SLOW_VAL.
 	 *
-	 * See the comment in pv_wait_node().
+	 * As this is the same vCPU that will check the _Q_SLOW_VAL value and
+	 * the hash table later on at unlock time, no atomic instruction is
+	 * needed.
 	 */
-	if (xchg(&pn->state, vcpu_running) == vcpu_halted)
-		pv_kick(pn->cpu);
+	WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+	(void)pv_hash(lock, pn);
 }
 
 /*
@@ -233,6 +252,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 	struct qspinlock **lp = NULL;
 	int loop;
 
+	/*
+	 * If pv_kick_node() already advanced our state, we don't need to
+	 * insert ourselves into the hash table anymore.
+	 */
+	if (READ_ONCE(pn->state) == vcpu_hashed)
+		lp = (struct qspinlock **)1;
+
 	for (;;) {
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
 			if (!READ_ONCE(l->locked))
@@ -240,9 +266,10 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 			cpu_relax();
 		}
 
-		WRITE_ONCE(pn->state, vcpu_halted);
 		if (!lp) { /* ONCE */
+			WRITE_ONCE(pn->state, vcpu_hashed);
 			lp = pv_hash(lock, pn);
+
 			/*
 			 * We must hash before setting _Q_SLOW_VAL, such that
 			 * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
@@ -333,8 +360,11 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	/*
 	 * At this point the memory pointed at by lock can be freed/reused,
 	 * however we can still use the pv_node to kick the CPU.
+	 * The other vCPU may not really be halted, but kicking an active
+	 * vCPU is harmless other than the additional latency in completing
+	 * the unlock.
 	 */
-	if (READ_ONCE(node->state) == vcpu_halted)
+	if (READ_ONCE(node->state) == vcpu_hashed)
 		pv_kick(node->cpu);
 }
 /*
-- 
cgit v0.10.2


From 76695af20c015206cffb84b15912be6797d0cca2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Sun, 2 Aug 2015 17:11:04 +0200
Subject: locking, arch: use WRITE_ONCE()/READ_ONCE() in
 smp_store_release()/smp_load_acquire()

Replace ACCESS_ONCE() macro in smp_store_release() and smp_load_acquire()
with WRITE_ONCE() and READ_ONCE() on x86, arm, arm64, ia64, metag, mips,
powerpc, s390, sparc and asm-generic since ACCESS_ONCE() does not work
reliably on non-scalar types.

WRITE_ONCE() and READ_ONCE() were introduced in the following commits:

  230fa253df63 ("kernel: Provide READ_ONCE and ASSIGN_ONCE")
  43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)")

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Link: http://lkml.kernel.org/r/1438528264-714-1-git-send-email-andreyknvl@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e..7039357 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -67,12 +67,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fa47c4..ef93b20 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,12 +44,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 843ba43..df896a1 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -66,12 +66,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index 5a696e5..172b7e5 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -90,12 +90,12 @@ static inline void fence(void)
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 7ecba84..752e0b8 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -133,12 +133,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 51ccc72..0eca6ef 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,12 +76,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
 	___p1;								\
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index e6f8615..d48fe01 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -42,12 +42,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 809941e..14a9286 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -60,12 +60,12 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e51a8f8..d2bcfbe 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -57,12 +57,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
@@ -74,12 +74,12 @@ do {									\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 55e3abc..b42afad 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -108,12 +108,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
-- 
cgit v0.10.2


From 76b235c6bcb16062d663e2ee96db0b69f2e6bc14 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 14:45:44 +0200
Subject: jump_label: Rename JUMP_LABEL_{EN,DIS}ABLE to JUMP_LABEL_{JMP,NOP}

Since we've already stepped away from ENABLE is a JMP and DISABLE is a
NOP with the branch_default bits, and are going to make it even worse,
rename it to make it all clearer.

This way we don't mix multiple levels of logic attributes, but have a
plain 'physical' name for what the current instruction patching status
of a jump label is.

This is a first step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
[ Beefed up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index e39cbf4..845a5dd 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	unsigned int insn;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		insn = arm_gen_branch(entry->code, entry->target);
 	else
 		insn = arm_gen_nop();
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 4f1fec7..c2dd1ad 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	u32 insn;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn = aarch64_insn_gen_branch_imm(entry->code,
 						   entry->target,
 						   AARCH64_INSN_BRANCH_NOLINK);
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index dda800e..3e586da 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e,
 	/* Target must have the right alignment and ISA must be preserved. */
 	BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT);
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op;
 		insn.j_format.target = e->target >> J_RANGE_SHIFT;
 	} else {
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index a1ed8a8..6472472 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 {
 	u32 *addr = (u32 *)(unsigned long)entry->code;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		patch_branch(addr, entry->target, 0);
 	else
 		patch_instruction(addr, PPC_INST_NOP);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a902996..a83d224 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 {
 	struct insn old, new;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		jump_label_make_nop(entry, &old);
 		jump_label_make_branch(entry, &new);
 	} else {
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 48565c1..59bbeff 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	u32 val;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		s32 off = (s32)entry->target - (s32)entry->code;
 
 #ifdef CONFIG_SPARC64
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 26d5a55..e565e0e 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 	const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
 	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		if (init) {
 			/*
 			 * Jump label is enabled for the first time.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index f4de473..6a8b4fe 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -86,8 +86,8 @@ struct static_key {
 #ifndef __ASSEMBLY__
 
 enum jump_label_type {
-	JUMP_LABEL_DISABLE = 0,
-	JUMP_LABEL_ENABLE,
+	JUMP_LABEL_NOP = 0,
+	JUMP_LABEL_JMP,
 };
 
 struct module;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 52ebaca..96d8945 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -65,9 +65,9 @@ void static_key_slow_inc(struct static_key *key)
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
 		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_ENABLE);
+			jump_label_update(key, JUMP_LABEL_JMP);
 		else
-			jump_label_update(key, JUMP_LABEL_DISABLE);
+			jump_label_update(key, JUMP_LABEL_NOP);
 	}
 	atomic_inc(&key->enabled);
 	jump_label_unlock();
@@ -88,9 +88,9 @@ static void __static_key_slow_dec(struct static_key *key,
 		schedule_delayed_work(work, rate_limit);
 	} else {
 		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_DISABLE);
+			jump_label_update(key, JUMP_LABEL_NOP);
 		else
-			jump_label_update(key, JUMP_LABEL_ENABLE);
+			jump_label_update(key, JUMP_LABEL_JMP);
 	}
 	jump_label_unlock();
 }
@@ -184,9 +184,9 @@ static enum jump_label_type jump_label_type(struct static_key *key)
 	bool state = static_key_enabled(key);
 
 	if ((!true_branch && state) || (true_branch && !state))
-		return JUMP_LABEL_ENABLE;
+		return JUMP_LABEL_JMP;
 
-	return JUMP_LABEL_DISABLE;
+	return JUMP_LABEL_NOP;
 }
 
 void __init jump_label_init(void)
@@ -276,7 +276,7 @@ void jump_label_apply_nops(struct module *mod)
 		return;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
+		arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 	}
 }
 
@@ -318,8 +318,8 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(key) == JUMP_LABEL_ENABLE)
-			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
+		if (jump_label_type(key) == JUMP_LABEL_JMP)
+			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_JMP);
 	}
 
 	return 0;
-- 
cgit v0.10.2


From a1efb01feca597b2abbc89873b40ef8ec6690168 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 14:55:40 +0200
Subject: jump_label, locking/static_keys: Rename JUMP_LABEL_TYPE_* and related
 helpers to the static_key* pattern

Rename the JUMP_LABEL_TYPE_* macros to be JUMP_TYPE_* and move the
inline helpers into kernel/jump_label.c, since that's the only place
they're ever used.

Also rename the helpers where it's all about static keys.

This is the second step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 6a8b4fe..0ddb208 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -101,24 +101,9 @@ static inline int static_key_count(struct static_key *key)
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_TYPE_FALSE_BRANCH	0UL
-#define JUMP_LABEL_TYPE_TRUE_BRANCH	1UL
-#define JUMP_LABEL_TYPE_MASK		1UL
-
-static
-inline struct jump_entry *jump_label_get_entries(struct static_key *key)
-{
-	return (struct jump_entry *)((unsigned long)key->entries
-						& ~JUMP_LABEL_TYPE_MASK);
-}
-
-static inline bool jump_label_get_branch_default(struct static_key *key)
-{
-	if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) ==
-	    JUMP_LABEL_TYPE_TRUE_BRANCH)
-		return true;
-	return false;
-}
+#define JUMP_TYPE_FALSE	0UL
+#define JUMP_TYPE_TRUE	1UL
+#define JUMP_TYPE_MASK	1UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
@@ -147,10 +132,10 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #define STATIC_KEY_INIT_TRUE ((struct static_key)		\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH })
+	  .entries = (void *)JUMP_TYPE_TRUE })
 #define STATIC_KEY_INIT_FALSE ((struct static_key)		\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH })
+	  .entries = (void *)JUMP_TYPE_FALSE })
 
 #else  /* !HAVE_JUMP_LABEL */
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 96d8945..85a2a00 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -56,6 +56,11 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 
 static void jump_label_update(struct static_key *key, int enable);
 
+static inline bool static_key_type(struct static_key *key)
+{
+	return (unsigned long)key->entries & JUMP_TYPE_MASK;
+}
+
 void static_key_slow_inc(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE();
@@ -64,7 +69,7 @@ void static_key_slow_inc(struct static_key *key)
 
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
-		if (!jump_label_get_branch_default(key))
+		if (!static_key_type(key))
 			jump_label_update(key, JUMP_LABEL_JMP);
 		else
 			jump_label_update(key, JUMP_LABEL_NOP);
@@ -87,7 +92,7 @@ static void __static_key_slow_dec(struct static_key *key,
 		atomic_inc(&key->enabled);
 		schedule_delayed_work(work, rate_limit);
 	} else {
-		if (!jump_label_get_branch_default(key))
+		if (!static_key_type(key))
 			jump_label_update(key, JUMP_LABEL_NOP);
 		else
 			jump_label_update(key, JUMP_LABEL_JMP);
@@ -178,15 +183,17 @@ static void __jump_label_update(struct static_key *key,
 	}
 }
 
-static enum jump_label_type jump_label_type(struct static_key *key)
+static inline struct jump_entry *static_key_entries(struct static_key *key)
 {
-	bool true_branch = jump_label_get_branch_default(key);
-	bool state = static_key_enabled(key);
+	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+}
 
-	if ((!true_branch && state) || (true_branch && !state))
-		return JUMP_LABEL_JMP;
+static enum jump_label_type jump_label_type(struct static_key *key)
+{
+	bool enabled = static_key_enabled(key);
+	bool type = static_key_type(key);
 
-	return JUMP_LABEL_NOP;
+	return enabled ^ type;
 }
 
 void __init jump_label_init(void)
@@ -442,7 +449,7 @@ int jump_label_text_reserved(void *start, void *end)
 static void jump_label_update(struct static_key *key, int enable)
 {
 	struct jump_entry *stop = __stop___jump_table;
-	struct jump_entry *entry = jump_label_get_entries(key);
+	struct jump_entry *entry = static_key_entries(key);
 #ifdef CONFIG_MODULES
 	struct module *mod;
 
-- 
cgit v0.10.2


From 7dcfd915bae51571bcc339d8e3dda027287880e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:02:27 +0200
Subject: jump_label: Add jump_entry_key() helper

Avoid some casting with a helper, also prepares the way for
overloading the LSB of jump_entry::key.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 85a2a00..72707e4 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -188,6 +188,11 @@ static inline struct jump_entry *static_key_entries(struct static_key *key)
 	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
 }
 
+static inline struct static_key *jump_entry_key(struct jump_entry *entry)
+{
+	return (struct static_key *)((unsigned long)entry->key);
+}
+
 static enum jump_label_type jump_label_type(struct static_key *key)
 {
 	bool enabled = static_key_enabled(key);
@@ -209,7 +214,7 @@ void __init jump_label_init(void)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		iterk = (struct static_key *)(unsigned long)iter->key;
+		iterk = jump_entry_key(iter);
 		arch_jump_label_transform_static(iter, jump_label_type(iterk));
 		if (iterk == key)
 			continue;
@@ -304,7 +309,7 @@ static int jump_label_add_module(struct module *mod)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		iterk = (struct static_key *)(unsigned long)iter->key;
+		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
 
@@ -341,10 +346,10 @@ static void jump_label_del_module(struct module *mod)
 	struct static_key_mod *jlm, **prev;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (iter->key == (jump_label_t)(unsigned long)key)
+		if (jump_entry_key(iter) == key)
 			continue;
 
-		key = (struct static_key *)(unsigned long)iter->key;
+		key = jump_entry_key(iter);
 
 		if (within_module(iter->key, mod))
 			continue;
-- 
cgit v0.10.2


From e33886b38cc82a9fc3b2d655dfc7f50467594138 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:03:40 +0200
Subject: locking/static_keys: Add static_key_{en,dis}able() helpers

Add two helpers to make it easier to treat the refcount as boolean.

Suggested-by: Jason Baron <jasonbaron0@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0ddb208..65f0eba 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -198,6 +198,26 @@ static inline bool static_key_enabled(struct static_key *key)
 	return static_key_count(key) > 0;
 }
 
+static inline void static_key_enable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (!count)
+		static_key_slow_inc(key);
+}
+
+static inline void static_key_disable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (count)
+		static_key_slow_dec(key);
+}
+
 #endif	/* _LINUX_JUMP_LABEL_H */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78b4bad10..66ae8ba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -164,14 +164,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
 
 static void sched_feat_disable(int i)
 {
-	if (static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_dec(&sched_feat_keys[i]);
+	static_key_disable(&sched_feat_keys[i]);
 }
 
 static void sched_feat_enable(int i)
 {
-	if (!static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_inc(&sched_feat_keys[i]);
+	static_key_enable(&sched_feat_keys[i]);
 }
 #else
 static void sched_feat_disable(int i) { };
-- 
cgit v0.10.2


From 706249c222f68471b6f8e9e8e9b77665c404b226 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:06:37 +0200
Subject: locking/static_keys: Rework update logic

Instead of spreading the branch_default logic all over the place,
concentrate it into the one jump_label_type() function.

This does mean we need to actually increment/decrement the enabled
count _before_ calling the update path, otherwise jump_label_type()
will not see the right state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 72707e4..2e7cc1e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -54,12 +54,7 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static void jump_label_update(struct static_key *key, int enable);
-
-static inline bool static_key_type(struct static_key *key)
-{
-	return (unsigned long)key->entries & JUMP_TYPE_MASK;
-}
+static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
@@ -68,13 +63,8 @@ void static_key_slow_inc(struct static_key *key)
 		return;
 
 	jump_label_lock();
-	if (atomic_read(&key->enabled) == 0) {
-		if (!static_key_type(key))
-			jump_label_update(key, JUMP_LABEL_JMP);
-		else
-			jump_label_update(key, JUMP_LABEL_NOP);
-	}
-	atomic_inc(&key->enabled);
+	if (atomic_inc_return(&key->enabled) == 1)
+		jump_label_update(key);
 	jump_label_unlock();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
@@ -92,10 +82,7 @@ static void __static_key_slow_dec(struct static_key *key,
 		atomic_inc(&key->enabled);
 		schedule_delayed_work(work, rate_limit);
 	} else {
-		if (!static_key_type(key))
-			jump_label_update(key, JUMP_LABEL_NOP);
-		else
-			jump_label_update(key, JUMP_LABEL_JMP);
+		jump_label_update(key);
 	}
 	jump_label_unlock();
 }
@@ -154,7 +141,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 	return 0;
 }
 
-/* 
+/*
  * Update code which is definitely not currently executing.
  * Architectures which need heavyweight synchronization to modify
  * running code can override this to make the non-live update case
@@ -163,29 +150,17 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
 					    enum jump_label_type type)
 {
-	arch_jump_label_transform(entry, type);	
+	arch_jump_label_transform(entry, type);
 }
 
-static void __jump_label_update(struct static_key *key,
-				struct jump_entry *entry,
-				struct jump_entry *stop, int enable)
+static inline struct jump_entry *static_key_entries(struct static_key *key)
 {
-	for (; (entry < stop) &&
-	      (entry->key == (jump_label_t)(unsigned long)key);
-	      entry++) {
-		/*
-		 * entry->code set to 0 invalidates module init text sections
-		 * kernel_text_address() verifies we are not in core kernel
-		 * init code, see jump_label_invalidate_module_init().
-		 */
-		if (entry->code && kernel_text_address(entry->code))
-			arch_jump_label_transform(entry, enable);
-	}
+	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
 }
 
-static inline struct jump_entry *static_key_entries(struct static_key *key)
+static inline bool static_key_type(struct static_key *key)
 {
-	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+	return (unsigned long)key->entries & JUMP_TYPE_MASK;
 }
 
 static inline struct static_key *jump_entry_key(struct jump_entry *entry)
@@ -193,14 +168,30 @@ static inline struct static_key *jump_entry_key(struct jump_entry *entry)
 	return (struct static_key *)((unsigned long)entry->key);
 }
 
-static enum jump_label_type jump_label_type(struct static_key *key)
+static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
+	struct static_key *key = jump_entry_key(entry);
 	bool enabled = static_key_enabled(key);
 	bool type = static_key_type(key);
 
 	return enabled ^ type;
 }
 
+static void __jump_label_update(struct static_key *key,
+				struct jump_entry *entry,
+				struct jump_entry *stop)
+{
+	for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
+		/*
+		 * entry->code set to 0 invalidates module init text sections
+		 * kernel_text_address() verifies we are not in core kernel
+		 * init code, see jump_label_invalidate_module_init().
+		 */
+		if (entry->code && kernel_text_address(entry->code))
+			arch_jump_label_transform(entry, jump_label_type(entry));
+	}
+}
+
 void __init jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
@@ -214,8 +205,8 @@ void __init jump_label_init(void)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
+		arch_jump_label_transform_static(iter, jump_label_type(iter));
 		iterk = jump_entry_key(iter);
-		arch_jump_label_transform_static(iter, jump_label_type(iterk));
 		if (iterk == key)
 			continue;
 
@@ -255,17 +246,15 @@ static int __jump_label_mod_text_reserved(void *start, void *end)
 				start, end);
 }
 
-static void __jump_label_mod_update(struct static_key *key, int enable)
+static void __jump_label_mod_update(struct static_key *key)
 {
-	struct static_key_mod *mod = key->next;
+	struct static_key_mod *mod;
 
-	while (mod) {
+	for (mod = key->next; mod; mod = mod->next) {
 		struct module *m = mod->mod;
 
 		__jump_label_update(key, mod->entries,
-				    m->jump_entries + m->num_jump_entries,
-				    enable);
-		mod = mod->next;
+				    m->jump_entries + m->num_jump_entries);
 	}
 }
 
@@ -287,9 +276,8 @@ void jump_label_apply_nops(struct module *mod)
 	if (iter_start == iter_stop)
 		return;
 
-	for (iter = iter_start; iter < iter_stop; iter++) {
+	for (iter = iter_start; iter < iter_stop; iter++)
 		arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
-	}
 }
 
 static int jump_label_add_module(struct module *mod)
@@ -330,8 +318,8 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(key) == JUMP_LABEL_JMP)
-			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_JMP);
+		if (jump_label_type(iter) == JUMP_LABEL_JMP)
+			__jump_label_update(key, iter, iter_stop);
 	}
 
 	return 0;
@@ -451,14 +439,14 @@ int jump_label_text_reserved(void *start, void *end)
 	return ret;
 }
 
-static void jump_label_update(struct static_key *key, int enable)
+static void jump_label_update(struct static_key *key)
 {
 	struct jump_entry *stop = __stop___jump_table;
 	struct jump_entry *entry = static_key_entries(key);
 #ifdef CONFIG_MODULES
 	struct module *mod;
 
-	__jump_label_mod_update(key, enable);
+	__jump_label_mod_update(key);
 
 	preempt_disable();
 	mod = __module_address((unsigned long)key);
@@ -468,7 +456,7 @@ static void jump_label_update(struct static_key *key, int enable)
 #endif
 	/* if there are no users, entry can be NULL */
 	if (entry)
-		__jump_label_update(key, entry, stop, enable);
+		__jump_label_update(key, entry, stop);
 }
 
 #endif
-- 
cgit v0.10.2


From 11276d5306b8e5b438a36bbff855fe792d7eaa61 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:09:55 +0200
Subject: locking/static_keys: Add a new static_key interface

There are various problems and short-comings with the current
static_key interface:

 - static_key_{true,false}() read like a branch depending on the key
   value, instead of the actual likely/unlikely branch depending on
   init value.

 - static_key_{true,false}() are, as stated above, tied to the
   static_key init values STATIC_KEY_INIT_{TRUE,FALSE}.

 - we're limited to the 2 (out of 4) possible options that compile to
   a default NOP because that's what our arch_static_branch() assembly
   emits.

So provide a new static_key interface:

  DEFINE_STATIC_KEY_TRUE(name);
  DEFINE_STATIC_KEY_FALSE(name);

Which define a key of different types with an initial true/false
value.

Then allow:

   static_branch_likely()
   static_branch_unlikely()

to take a key of either type and emit the right instruction for the
case.

This means adding a second arch_static_branch_jump() assembly helper
which emits a JMP per default.

In order to determine the right instruction for the right state,
encode the branch type in the LSB of jump_entry::key.

This is the final step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

... but it also allows new static key combinations that will give us
performance enhancements in the subsequent patches.

Tested-by: Rabin Vincent <rabin@rab.in> # arm
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> # ppc
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # s390
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 5f337dc..34f7b69 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,23 +4,32 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/unified.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#ifdef CONFIG_THUMB2_KERNEL
-#define JUMP_LABEL_NOP	"nop.w"
-#else
-#define JUMP_LABEL_NOP	"nop"
-#endif
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 WASM(nop) "\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
-		 JUMP_LABEL_NOP "\n\t"
+		 WASM(b) " %l[l_yes]\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 : :  "i" (key) :  : l_yes);
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index c0e5165..1b5e0e8 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -26,14 +26,28 @@
 
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm goto("1: nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".align 3\n\t"
 		 ".quad 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 :  :  "i"(key) :  : l_yes);
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm goto("1: b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 3\n\t"
+		 ".quad 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 608aa57..e776725 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -26,14 +26,29 @@
 #define NOP_INSN "nop"
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\t" NOP_INSN "\n\t"
 		"nop\n\t"
 		".pushsection __jump_table,  \"aw\"\n\t"
 		WORD_INSN " 1b, %l[l_yes], %0\n\t"
 		".popsection\n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\tj %l[l_yes]\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index efbf9a3..47e155f 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -18,14 +18,29 @@
 #define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
 #define JUMP_LABEL_NOP_SIZE	4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
 		 "nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
 		 ".popsection \n\t"
-		 : :  "i" (key) : : l_yes);
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 69972b7..7f9fd5e 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -12,14 +12,29 @@
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
  */
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("0:	brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
 		".pushsection __jump_table, \"aw\"\n"
 		".balign 8\n"
 		".quad 0b, %l[label], %0\n"
 		".popsection\n"
-		: : "X" (key) : : label);
+		: : "X" (&((char *)key)[branch]) : : label);
+
+	return false;
+label:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("0:	brcl 15, %l[label]\n"
+		".pushsection __jump_table, \"aw\"\n"
+		".balign 8\n"
+		".quad 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (&((char *)key)[branch]) : : label);
+
 	return false;
 label:
 	return true;
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index cc9b04a..62d0354 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,16 +7,33 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-		asm_volatile_goto("1:\n\t"
-			 "nop\n\t"
-			 "nop\n\t"
-			 ".pushsection __jump_table,  \"aw\"\n\t"
-			 ".align 4\n\t"
-			 ".word 1b, %l[l_yes], %c0\n\t"
-			 ".popsection \n\t"
-			 : :  "i" (key) : : l_yes);
+	asm_volatile_goto("1:\n\t"
+		 "nop\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a4c1cf7..28d7a85 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -16,7 +16,7 @@
 # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
@@ -24,7 +24,24 @@ static __always_inline bool arch_static_branch(struct static_key *key)
 		_ASM_ALIGN "\n\t"
 		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
 		".popsection \n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+		"2:\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 65f0eba..e337a19 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -107,12 +107,12 @@ static inline int static_key_count(struct static_key *key)
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	return arch_static_branch(key);
+	return arch_static_branch(key, false);
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	return !static_key_false(key);
+	return !arch_static_branch(key, true);
 }
 
 extern struct jump_entry __start___jump_table[];
@@ -130,12 +130,12 @@ extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
 extern void jump_label_apply_nops(struct module *mod);
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key)		\
+#define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  .entries = (void *)JUMP_TYPE_TRUE })
-#define STATIC_KEY_INIT_FALSE ((struct static_key)		\
+	  .entries = (void *)JUMP_TYPE_TRUE }
+#define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  .entries = (void *)JUMP_TYPE_FALSE })
+	  .entries = (void *)JUMP_TYPE_FALSE }
 
 #else  /* !HAVE_JUMP_LABEL */
 
@@ -183,10 +183,8 @@ static inline int jump_label_apply_nops(struct module *mod)
 	return 0;
 }
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(1) })
-#define STATIC_KEY_INIT_FALSE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(0) })
+#define STATIC_KEY_INIT_TRUE	{ .enabled = ATOMIC_INIT(1) }
+#define STATIC_KEY_INIT_FALSE	{ .enabled = ATOMIC_INIT(0) }
 
 #endif	/* HAVE_JUMP_LABEL */
 
@@ -218,6 +216,137 @@ static inline void static_key_disable(struct static_key *key)
 		static_key_slow_dec(key);
 }
 
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Two type wrappers around static_key, such that we can use compile time
+ * type differentiation to emit the right code.
+ *
+ * All the below code is macros in order to play type games.
+ */
+
+struct static_key_true {
+	struct static_key key;
+};
+
+struct static_key_false {
+	struct static_key key;
+};
+
+#define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
+#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
+
+#define DEFINE_STATIC_KEY_TRUE(name)	\
+	struct static_key_true name = STATIC_KEY_TRUE_INIT
+
+#define DEFINE_STATIC_KEY_FALSE(name)	\
+	struct static_key_false name = STATIC_KEY_FALSE_INIT
+
+#ifdef HAVE_JUMP_LABEL
+
+/*
+ * Combine the right initial value (type) with the right branch order
+ * to generate the desired result.
+ *
+ *
+ * type\branch|	likely (1)	      |	unlikely (0)
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  true (1)  |	   ...		      |	   ...
+ *            |    NOP		      |	   JMP L
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  false (0) |	   ...		      |	   ...
+ *            |    JMP L	      |	   NOP
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *
+ * The initial value is encoded in the LSB of static_key::entries,
+ * type: 0 = false, 1 = true.
+ *
+ * The branch type is encoded in the LSB of jump_entry::key,
+ * branch: 0 = unlikely, 1 = likely.
+ *
+ * This gives the following logic table:
+ *
+ *	enabled	type	branch	  instuction
+ * -----------------------------+-----------
+ *	0	0	0	| NOP
+ *	0	0	1	| JMP
+ *	0	1	0	| NOP
+ *	0	1	1	| JMP
+ *
+ *	1	0	0	| JMP
+ *	1	0	1	| NOP
+ *	1	1	0	| JMP
+ *	1	1	1	| NOP
+ *
+ * Which gives the following functions:
+ *
+ *   dynamic: instruction = enabled ^ branch
+ *   static:  instruction = type ^ branch
+ *
+ * See jump_label_type() / jump_label_init_type().
+ */
+
+extern bool ____wrong_branch_error(void);
+
+#define static_branch_likely(x)							\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = !arch_static_branch(&(x)->key, true);			\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = !arch_static_branch_jump(&(x)->key, true);		\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#define static_branch_unlikely(x)						\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = arch_static_branch_jump(&(x)->key, false);		\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = arch_static_branch(&(x)->key, false);			\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#else /* !HAVE_JUMP_LABEL */
+
+#define static_branch_likely(x)		likely(static_key_enabled(&(x)->key))
+#define static_branch_unlikely(x)	unlikely(static_key_enabled(&(x)->key))
+
+#endif /* HAVE_JUMP_LABEL */
+
+/*
+ * Advanced usage; refcount, branch is enabled when: count != 0
+ */
+
+#define static_branch_inc(x)		static_key_slow_inc(&(x)->key)
+#define static_branch_dec(x)		static_key_slow_dec(&(x)->key)
+
+/*
+ * Normal usage; boolean enable/disable.
+ */
+
+#define static_branch_enable(x)		static_key_enable(&(x)->key)
+#define static_branch_disable(x)	static_key_disable(&(x)->key)
+
 #endif	/* _LINUX_JUMP_LABEL_H */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 2e7cc1e..8fd00d8 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -165,16 +165,22 @@ static inline bool static_key_type(struct static_key *key)
 
 static inline struct static_key *jump_entry_key(struct jump_entry *entry)
 {
-	return (struct static_key *)((unsigned long)entry->key);
+	return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static bool jump_entry_branch(struct jump_entry *entry)
+{
+	return (unsigned long)entry->key & 1UL;
 }
 
 static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
 	struct static_key *key = jump_entry_key(entry);
 	bool enabled = static_key_enabled(key);
-	bool type = static_key_type(key);
+	bool branch = jump_entry_branch(entry);
 
-	return enabled ^ type;
+	/* See the comment in linux/jump_label.h */
+	return enabled ^ branch;
 }
 
 static void __jump_label_update(struct static_key *key,
@@ -205,7 +211,10 @@ void __init jump_label_init(void)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		arch_jump_label_transform_static(iter, jump_label_type(iter));
+		/* rewrite NOPs */
+		if (jump_label_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
@@ -225,6 +234,16 @@ void __init jump_label_init(void)
 
 #ifdef CONFIG_MODULES
 
+static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
+{
+	struct static_key *key = jump_entry_key(entry);
+	bool type = static_key_type(key);
+	bool branch = jump_entry_branch(entry);
+
+	/* See the comment in linux/jump_label.h */
+	return type ^ branch;
+}
+
 struct static_key_mod {
 	struct static_key_mod *next;
 	struct jump_entry *entries;
@@ -276,8 +295,11 @@ void jump_label_apply_nops(struct module *mod)
 	if (iter_start == iter_stop)
 		return;
 
-	for (iter = iter_start; iter < iter_stop; iter++)
-		arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		/* Only write NOPs for arch_branch_static(). */
+		if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+	}
 }
 
 static int jump_label_add_module(struct module *mod)
@@ -318,7 +340,8 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(iter) == JUMP_LABEL_JMP)
+		/* Only update if we've changed from our initial state */
+		if (jump_label_type(iter) != jump_label_init_type(iter))
 			__jump_label_update(key, iter, iter_stop);
 	}
 
-- 
cgit v0.10.2


From 1987c947d905baa05bee430178e8eef882f36bd4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 27 Jul 2015 18:32:09 +0200
Subject: locking/static_keys: Add selftest

Add a little selftest that validates all combinations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/Kconfig b/arch/Kconfig
index 8a8ea71..a71cdbe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -71,6 +71,12 @@ config JUMP_LABEL
 	 ( On 32-bit x86, the necessary options added to the compiler
 	   flags may increase the size of the kernel slightly. )
 
+config STATIC_KEYS_SELFTEST
+	bool "Static key selftest"
+	depends on JUMP_LABEL
+	help
+	  Boot time self-test of the branch patching code.
+
 config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 8fd00d8..f7dd15d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -482,4 +482,41 @@ static void jump_label_update(struct static_key *key)
 		__jump_label_update(key, entry, stop);
 }
 
-#endif
+#ifdef CONFIG_STATIC_KEYS_SELFTEST
+static DEFINE_STATIC_KEY_TRUE(sk_true);
+static DEFINE_STATIC_KEY_FALSE(sk_false);
+
+static __init int jump_label_test(void)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		WARN_ON(static_key_enabled(&sk_true.key) != true);
+		WARN_ON(static_key_enabled(&sk_false.key) != false);
+
+		WARN_ON(!static_branch_likely(&sk_true));
+		WARN_ON(!static_branch_unlikely(&sk_true));
+		WARN_ON(static_branch_likely(&sk_false));
+		WARN_ON(static_branch_unlikely(&sk_false));
+
+		static_branch_disable(&sk_true);
+		static_branch_enable(&sk_false);
+
+		WARN_ON(static_key_enabled(&sk_true.key) == true);
+		WARN_ON(static_key_enabled(&sk_false.key) == false);
+
+		WARN_ON(static_branch_likely(&sk_true));
+		WARN_ON(static_branch_unlikely(&sk_true));
+		WARN_ON(!static_branch_likely(&sk_false));
+		WARN_ON(!static_branch_unlikely(&sk_false));
+
+		static_branch_enable(&sk_true);
+		static_branch_disable(&sk_false);
+	}
+
+	return 0;
+}
+late_initcall(jump_label_test);
+#endif /* STATIC_KEYS_SELFTEST */
+
+#endif /* HAVE_JUMP_LABEL */
-- 
cgit v0.10.2


From 3bbfafb77a06327fa1bc9f19bc55b5c558475091 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 16:34:32 +0200
Subject: x86, tsc, locking/static_keys: Employ static_branch_likely()

Because of the static_key restrictions we had to take an unconditional
jump for the most likely case, causing $I bloat.

Rewrite to use the new primitives.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1bb8bab..b9cfd46 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -38,7 +38,7 @@ static int __read_mostly tsc_unstable;
    erroneous rdtsc usage on !cpu_has_tsc processors */
 static int __read_mostly tsc_disabled = -1;
 
-static struct static_key __use_tsc = STATIC_KEY_INIT;
+static DEFINE_STATIC_KEY_FALSE(__use_tsc);
 
 int tsc_clocksource_reliable;
 
@@ -274,7 +274,12 @@ done:
  */
 u64 native_sched_clock(void)
 {
-	u64 tsc_now;
+	if (static_branch_likely(&__use_tsc)) {
+		u64 tsc_now = rdtsc();
+
+		/* return the value in ns */
+		return cycles_2_ns(tsc_now);
+	}
 
 	/*
 	 * Fall back to jiffies if there's no TSC available:
@@ -284,16 +289,9 @@ u64 native_sched_clock(void)
 	 *   very important for it to be as fast as the platform
 	 *   can achieve it. )
 	 */
-	if (!static_key_false(&__use_tsc)) {
-		/* No locking but a rare wrong value is not a big deal: */
-		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-	}
-
-	/* read the Time Stamp Counter: */
-	tsc_now = rdtsc();
 
-	/* return the value in ns */
-	return cycles_2_ns(tsc_now);
+	/* No locking but a rare wrong value is not a big deal: */
+	return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
 }
 
 /* We need to define a real function for sched_clock, to override the
@@ -1204,7 +1202,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 
 	tsc_disabled = 0;
-	static_key_slow_inc(&__use_tsc);
+	static_branch_enable(&__use_tsc);
 
 	if (!no_sched_irq_time)
 		enable_sched_clock_irqtime();
-- 
cgit v0.10.2


From ed79e946732e5311934d7f404b3b4e702e45cb97 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 29 Jul 2015 08:31:24 +0200
Subject: s390/uaccess, locking/static_keys: employ static_branch_likely()

Use the new static_branch_likely() primitive to make sure that the
most likely case is executed without taking an unconditional branch.
This wasn't possible with the old jump label primitives.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20150729064600.GB3953@osiris
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 4614d41..93cb1d0 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,7 +15,7 @@
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
 
-static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+static DEFINE_STATIC_KEY_FALSE(have_mvcos);
 
 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
 						 unsigned long size)
@@ -104,7 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 
 unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_from_user_mvcos(to, from, n);
 	return copy_from_user_mvcp(to, from, n);
 }
@@ -177,7 +177,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 
 unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_to_user_mvcos(to, from, n);
 	return copy_to_user_mvcs(to, from, n);
 }
@@ -240,7 +240,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
 
 unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_in_user_mvcos(to, from, n);
 	return copy_in_user_mvc(to, from, n);
 }
@@ -312,7 +312,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 
 unsigned long __clear_user(void __user *to, unsigned long size)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 			return clear_user_mvcos(to, size);
 	return clear_user_xc(to, size);
 }
@@ -386,7 +386,7 @@ early_param("uaccess_primary", parse_uaccess_pt);
 static int __init uaccess_init(void)
 {
 	if (!uaccess_primary && test_facility(27))
-		static_key_slow_inc(&have_mvcos);
+		static_branch_enable(&have_mvcos);
 	return 0;
 }
 early_initcall(uaccess_init);
-- 
cgit v0.10.2


From 579e1acb153464649781fe5555b4892c0ff84a40 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 30 Jul 2015 03:59:44 +0000
Subject: jump_label: Provide a self-test

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: bp@alien8.de
Cc: davem@davemloft.net
Cc: ddaney@caviumnetworks.com
Cc: heiko.carstens@de.ibm.com
Cc: linux-kernel@vger.kernel.org
Cc: liuj97@gmail.com
Cc: luto@amacapital.net
Cc: michael@ellerman.id.au
Cc: rabin@rab.in
Cc: ralf@linux-mips.org
Cc: rostedt@goodmis.org
Cc: shuahkh@osg.samsung.com
Cc: vbabka@suse.cz
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/0c091ecebd78a879ed8a71835d205a691a75ab4e.1438227999.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e7b5b65..304e75f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1841,6 +1841,15 @@ config MEMTEST
 	        memtest=17, mean do 17 test patterns.
 	  If you are unsure how to answer this question, answer N.
 
+config TEST_JUMP_LABEL
+	tristate "Test jump label"
+	default n
+	depends on m
+	help
+	  Test jump labels.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 6897b52..14f6e07 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,6 +39,8 @@ obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
+obj-$(CONFIG_TEST_JUMP_LABEL) += test_jump_label.o
+obj-$(CONFIG_TEST_JUMP_LABEL) += test_jump_label_base.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/test_jump_label.c b/lib/test_jump_label.c
new file mode 100644
index 0000000..2fe7413
--- /dev/null
+++ b/lib/test_jump_label.c
@@ -0,0 +1,225 @@
+/*
+ * Kernel module for testing jump labels.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key old_true_key = STATIC_KEY_INIT_TRUE;
+struct static_key old_false_key = STATIC_KEY_INIT_FALSE;
+
+/* new api */
+DEFINE_STATIC_KEY_TRUE(true_key);
+DEFINE_STATIC_KEY_FALSE(false_key);
+
+/* external */
+extern struct static_key base_old_true_key;
+extern struct static_key base_inv_old_true_key;
+extern struct static_key base_old_false_key;
+extern struct static_key base_inv_old_false_key;
+
+/* new api */
+extern struct static_key_true base_true_key;
+extern struct static_key_true base_inv_true_key;
+extern struct static_key_false base_false_key;
+extern struct static_key_false base_inv_false_key;
+
+
+struct test_branch {
+	bool init_state;
+	struct static_key *key;
+	bool (*test_key)(void);
+};
+
+#define test_key_func(key, branch)	\
+({bool func(void) { return branch(key); } func;	})
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static void invert_keys(struct test_branch *branches, int size)
+{
+	struct static_key *previous = NULL;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (previous != branches[i].key) {
+			invert_key(branches[i].key);
+			previous = branches[i].key;
+		}
+	}
+}
+
+int verify_branches(struct test_branch *branches, int size, bool invert)
+{
+	int i;
+	bool ret, init;
+
+	for (i = 0; i < size; i++) {
+		ret = static_key_enabled(branches[i].key);
+		init = branches[i].init_state;
+		if (ret != (invert ? !init : init))
+			return -EINVAL;
+		ret = branches[i].test_key();
+		if (static_key_enabled(branches[i].key)) {
+			if (!ret)
+				return -EINVAL;
+		} else {
+			if (ret)
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int __init test_jump_label_init(void)
+{
+	int ret;
+	int size;
+
+	struct test_branch jump_label_tests[] = {
+		/* internal keys - old keys */
+		{
+			.init_state = true,
+			.key = &old_true_key,
+			.test_key = test_key_func(&old_true_key, static_key_true),
+		},
+		{
+			.init_state = false,
+			.key = &old_false_key,
+			.test_key = test_key_func(&old_false_key, static_key_false),
+		},
+		/* internal keys - new keys */
+		{
+			.init_state = true,
+			.key = &true_key.key,
+			.test_key = test_key_func(&true_key, static_branch_likely),
+		},
+		{
+			.init_state = true,
+			.key = &true_key.key,
+			.test_key = test_key_func(&true_key, static_branch_unlikely),
+		},
+		{
+			.init_state = false,
+			.key = &false_key.key,
+			.test_key = test_key_func(&false_key, static_branch_likely),
+		},
+		{
+			.init_state = false,
+			.key = &false_key.key,
+			.test_key = test_key_func(&false_key, static_branch_unlikely),
+		},
+		/* external keys - old keys */
+		{
+			.init_state = true,
+			.key = &base_old_true_key,
+			.test_key = test_key_func(&base_old_true_key, static_key_true),
+		},
+		{
+			.init_state = false,
+			.key = &base_inv_old_true_key,
+			.test_key = test_key_func(&base_inv_old_true_key, static_key_true),
+		},
+		{
+			.init_state = false,
+			.key = &base_old_false_key,
+			.test_key = test_key_func(&base_old_false_key, static_key_false),
+		},
+		{
+			.init_state = true,
+			.key = &base_inv_old_false_key,
+			.test_key = test_key_func(&base_inv_old_false_key, static_key_false),
+		},
+		/* external keys - new keys */
+		{
+			.init_state = true,
+			.key = &base_true_key.key,
+			.test_key = test_key_func(&base_true_key, static_branch_likely),
+		},
+		{
+			.init_state = true,
+			.key = &base_true_key.key,
+			.test_key = test_key_func(&base_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state = false,
+			.key = &base_inv_true_key.key,
+			.test_key = test_key_func(&base_inv_true_key, static_branch_likely),
+		},
+		{
+			.init_state = false,
+			.key = &base_inv_true_key.key,
+			.test_key = test_key_func(&base_inv_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state = false,
+			.key = &base_false_key.key,
+			.test_key = test_key_func(&base_false_key, static_branch_likely),
+		},
+		{
+			.init_state = false,
+			.key = &base_false_key.key,
+			.test_key = test_key_func(&base_false_key, static_branch_unlikely),
+		},
+		{
+			.init_state = true,
+			.key = &base_inv_false_key.key,
+			.test_key = test_key_func(&base_inv_false_key, static_branch_likely),
+		},
+		{
+			.init_state = true,
+			.key = &base_inv_false_key.key,
+			.test_key = test_key_func(&base_inv_false_key, static_branch_unlikely),
+		},
+	};
+
+	size = ARRAY_SIZE(jump_label_tests);
+
+	ret = verify_branches(jump_label_tests, size, false);
+	if (ret)
+		goto out;
+
+	invert_keys(jump_label_tests, size);
+	ret = verify_branches(jump_label_tests, size, true);
+	if (ret)
+		goto out;
+
+	invert_keys(jump_label_tests, size);
+	ret = verify_branches(jump_label_tests, size, false);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static void __exit test_jump_label_exit(void)
+{
+}
+
+module_init(test_jump_label_init);
+module_exit(test_jump_label_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_jump_label_base.c b/lib/test_jump_label_base.c
new file mode 100644
index 0000000..91aed8e
--- /dev/null
+++ b/lib/test_jump_label_base.c
@@ -0,0 +1,68 @@
+/*
+ * Kernel module for testing jump labels.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key base_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_old_true_key);
+struct static_key base_inv_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_inv_old_true_key);
+struct static_key base_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_old_false_key);
+struct static_key base_inv_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_inv_old_false_key);
+
+/* new keys */
+DEFINE_STATIC_KEY_TRUE(base_true_key);
+EXPORT_SYMBOL_GPL(base_true_key);
+DEFINE_STATIC_KEY_TRUE(base_inv_true_key);
+EXPORT_SYMBOL_GPL(base_inv_true_key);
+DEFINE_STATIC_KEY_FALSE(base_false_key);
+EXPORT_SYMBOL_GPL(base_false_key);
+DEFINE_STATIC_KEY_FALSE(base_inv_false_key);
+EXPORT_SYMBOL_GPL(base_inv_false_key);
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static int __init test_jump_label_base_init(void)
+{
+	invert_key(&base_inv_old_true_key);
+	invert_key(&base_inv_old_false_key);
+	invert_key(&base_inv_true_key.key);
+	invert_key(&base_inv_false_key.key);
+
+	return 0;
+}
+
+static void __exit test_jump_label_base_exit(void)
+{
+}
+
+module_init(test_jump_label_base_init);
+module_exit(test_jump_label_base_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 24ae9e8..b8f12e0 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -20,6 +20,7 @@ ifneq (1, $(quicktest))
 TARGETS += timers
 endif
 TARGETS += user
+TARGETS += jumplabel
 TARGETS += vm
 TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
diff --git a/tools/testing/selftests/jumplabel/Makefile b/tools/testing/selftests/jumplabel/Makefile
new file mode 100644
index 0000000..7526aa5
--- /dev/null
+++ b/tools/testing/selftests/jumplabel/Makefile
@@ -0,0 +1,8 @@
+# Makefile for jump label selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_jump_label.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/jumplabel/test_jump_label.sh b/tools/testing/selftests/jumplabel/test_jump_label.sh
new file mode 100644
index 0000000..3457e8a
--- /dev/null
+++ b/tools/testing/selftests/jumplabel/test_jump_label.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Runs jump label kernel module tests
+
+if /sbin/modprobe -q test_jump_label_base; then
+	if /sbin/modprobe -q test_jump_label; then
+		echo "jump_label: ok"
+		/sbin/modprobe -q -r test_jump_label
+		/sbin/modprobe -q -r test_jump_label_base
+	else
+		echo "jump_label: [FAIL]"
+		/sbin/modprobe -q -r test_jump_label_base
+	fi
+else
+	echo "jump_label: [FAIL]"
+	exit 1
+fi
-- 
cgit v0.10.2


From 2bf9e0ab08c64ac56067555911a1ae81ebff5f96 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 3 Aug 2015 11:42:57 +0200
Subject: locking/static_keys: Provide a selftest

The 'jump label' self-test is in reality testing static keys - rename things
accordingly.

Also prettify the code in various places while at it.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: bp@alien8.de
Cc: davem@davemloft.net
Cc: ddaney@caviumnetworks.com
Cc: heiko.carstens@de.ibm.com
Cc: linux-kernel@vger.kernel.org
Cc: liuj97@gmail.com
Cc: luto@amacapital.net
Cc: michael@ellerman.id.au
Cc: rabin@rab.in
Cc: ralf@linux-mips.org
Cc: rostedt@goodmis.org
Cc: vbabka@suse.cz
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/0c091ecebd78a879ed8a71835d205a691a75ab4e.1438227999.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 304e75f..0d85930 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1841,12 +1841,12 @@ config MEMTEST
 	        memtest=17, mean do 17 test patterns.
 	  If you are unsure how to answer this question, answer N.
 
-config TEST_JUMP_LABEL
-	tristate "Test jump label"
+config TEST_STATIC_KEYS
+	tristate "Test static keys"
 	default n
 	depends on m
 	help
-	  Test jump labels.
+	  Test the static key interfaces.
 
 	  If unsure, say N.
 
diff --git a/lib/Makefile b/lib/Makefile
index 14f6e07..9f2fc71 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,8 +39,8 @@ obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
-obj-$(CONFIG_TEST_JUMP_LABEL) += test_jump_label.o
-obj-$(CONFIG_TEST_JUMP_LABEL) += test_jump_label_base.o
+obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
+obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/test_jump_label.c b/lib/test_jump_label.c
deleted file mode 100644
index 2fe7413..0000000
--- a/lib/test_jump_label.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Kernel module for testing jump labels.
- *
- * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
- *
- * Authors:
- *      Jason Baron       <jbaron@akamai.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/jump_label.h>
-
-/* old keys */
-struct static_key old_true_key = STATIC_KEY_INIT_TRUE;
-struct static_key old_false_key = STATIC_KEY_INIT_FALSE;
-
-/* new api */
-DEFINE_STATIC_KEY_TRUE(true_key);
-DEFINE_STATIC_KEY_FALSE(false_key);
-
-/* external */
-extern struct static_key base_old_true_key;
-extern struct static_key base_inv_old_true_key;
-extern struct static_key base_old_false_key;
-extern struct static_key base_inv_old_false_key;
-
-/* new api */
-extern struct static_key_true base_true_key;
-extern struct static_key_true base_inv_true_key;
-extern struct static_key_false base_false_key;
-extern struct static_key_false base_inv_false_key;
-
-
-struct test_branch {
-	bool init_state;
-	struct static_key *key;
-	bool (*test_key)(void);
-};
-
-#define test_key_func(key, branch)	\
-({bool func(void) { return branch(key); } func;	})
-
-static void invert_key(struct static_key *key)
-{
-	if (static_key_enabled(key))
-		static_key_disable(key);
-	else
-		static_key_enable(key);
-}
-
-static void invert_keys(struct test_branch *branches, int size)
-{
-	struct static_key *previous = NULL;
-	int i;
-
-	for (i = 0; i < size; i++) {
-		if (previous != branches[i].key) {
-			invert_key(branches[i].key);
-			previous = branches[i].key;
-		}
-	}
-}
-
-int verify_branches(struct test_branch *branches, int size, bool invert)
-{
-	int i;
-	bool ret, init;
-
-	for (i = 0; i < size; i++) {
-		ret = static_key_enabled(branches[i].key);
-		init = branches[i].init_state;
-		if (ret != (invert ? !init : init))
-			return -EINVAL;
-		ret = branches[i].test_key();
-		if (static_key_enabled(branches[i].key)) {
-			if (!ret)
-				return -EINVAL;
-		} else {
-			if (ret)
-				return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-static int __init test_jump_label_init(void)
-{
-	int ret;
-	int size;
-
-	struct test_branch jump_label_tests[] = {
-		/* internal keys - old keys */
-		{
-			.init_state = true,
-			.key = &old_true_key,
-			.test_key = test_key_func(&old_true_key, static_key_true),
-		},
-		{
-			.init_state = false,
-			.key = &old_false_key,
-			.test_key = test_key_func(&old_false_key, static_key_false),
-		},
-		/* internal keys - new keys */
-		{
-			.init_state = true,
-			.key = &true_key.key,
-			.test_key = test_key_func(&true_key, static_branch_likely),
-		},
-		{
-			.init_state = true,
-			.key = &true_key.key,
-			.test_key = test_key_func(&true_key, static_branch_unlikely),
-		},
-		{
-			.init_state = false,
-			.key = &false_key.key,
-			.test_key = test_key_func(&false_key, static_branch_likely),
-		},
-		{
-			.init_state = false,
-			.key = &false_key.key,
-			.test_key = test_key_func(&false_key, static_branch_unlikely),
-		},
-		/* external keys - old keys */
-		{
-			.init_state = true,
-			.key = &base_old_true_key,
-			.test_key = test_key_func(&base_old_true_key, static_key_true),
-		},
-		{
-			.init_state = false,
-			.key = &base_inv_old_true_key,
-			.test_key = test_key_func(&base_inv_old_true_key, static_key_true),
-		},
-		{
-			.init_state = false,
-			.key = &base_old_false_key,
-			.test_key = test_key_func(&base_old_false_key, static_key_false),
-		},
-		{
-			.init_state = true,
-			.key = &base_inv_old_false_key,
-			.test_key = test_key_func(&base_inv_old_false_key, static_key_false),
-		},
-		/* external keys - new keys */
-		{
-			.init_state = true,
-			.key = &base_true_key.key,
-			.test_key = test_key_func(&base_true_key, static_branch_likely),
-		},
-		{
-			.init_state = true,
-			.key = &base_true_key.key,
-			.test_key = test_key_func(&base_true_key, static_branch_unlikely),
-		},
-		{
-			.init_state = false,
-			.key = &base_inv_true_key.key,
-			.test_key = test_key_func(&base_inv_true_key, static_branch_likely),
-		},
-		{
-			.init_state = false,
-			.key = &base_inv_true_key.key,
-			.test_key = test_key_func(&base_inv_true_key, static_branch_unlikely),
-		},
-		{
-			.init_state = false,
-			.key = &base_false_key.key,
-			.test_key = test_key_func(&base_false_key, static_branch_likely),
-		},
-		{
-			.init_state = false,
-			.key = &base_false_key.key,
-			.test_key = test_key_func(&base_false_key, static_branch_unlikely),
-		},
-		{
-			.init_state = true,
-			.key = &base_inv_false_key.key,
-			.test_key = test_key_func(&base_inv_false_key, static_branch_likely),
-		},
-		{
-			.init_state = true,
-			.key = &base_inv_false_key.key,
-			.test_key = test_key_func(&base_inv_false_key, static_branch_unlikely),
-		},
-	};
-
-	size = ARRAY_SIZE(jump_label_tests);
-
-	ret = verify_branches(jump_label_tests, size, false);
-	if (ret)
-		goto out;
-
-	invert_keys(jump_label_tests, size);
-	ret = verify_branches(jump_label_tests, size, true);
-	if (ret)
-		goto out;
-
-	invert_keys(jump_label_tests, size);
-	ret = verify_branches(jump_label_tests, size, false);
-	if (ret)
-		goto out;
-	return 0;
-out:
-	return ret;
-}
-
-static void __exit test_jump_label_exit(void)
-{
-}
-
-module_init(test_jump_label_init);
-module_exit(test_jump_label_exit);
-
-MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
-MODULE_LICENSE("GPL");
diff --git a/lib/test_jump_label_base.c b/lib/test_jump_label_base.c
deleted file mode 100644
index 91aed8e..0000000
--- a/lib/test_jump_label_base.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Kernel module for testing jump labels.
- *
- * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
- *
- * Authors:
- *      Jason Baron       <jbaron@akamai.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/jump_label.h>
-
-/* old keys */
-struct static_key base_old_true_key = STATIC_KEY_INIT_TRUE;
-EXPORT_SYMBOL_GPL(base_old_true_key);
-struct static_key base_inv_old_true_key = STATIC_KEY_INIT_TRUE;
-EXPORT_SYMBOL_GPL(base_inv_old_true_key);
-struct static_key base_old_false_key = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL_GPL(base_old_false_key);
-struct static_key base_inv_old_false_key = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL_GPL(base_inv_old_false_key);
-
-/* new keys */
-DEFINE_STATIC_KEY_TRUE(base_true_key);
-EXPORT_SYMBOL_GPL(base_true_key);
-DEFINE_STATIC_KEY_TRUE(base_inv_true_key);
-EXPORT_SYMBOL_GPL(base_inv_true_key);
-DEFINE_STATIC_KEY_FALSE(base_false_key);
-EXPORT_SYMBOL_GPL(base_false_key);
-DEFINE_STATIC_KEY_FALSE(base_inv_false_key);
-EXPORT_SYMBOL_GPL(base_inv_false_key);
-
-static void invert_key(struct static_key *key)
-{
-	if (static_key_enabled(key))
-		static_key_disable(key);
-	else
-		static_key_enable(key);
-}
-
-static int __init test_jump_label_base_init(void)
-{
-	invert_key(&base_inv_old_true_key);
-	invert_key(&base_inv_old_false_key);
-	invert_key(&base_inv_true_key.key);
-	invert_key(&base_inv_false_key.key);
-
-	return 0;
-}
-
-static void __exit test_jump_label_base_exit(void)
-{
-}
-
-module_init(test_jump_label_base_init);
-module_exit(test_jump_label_base_exit);
-
-MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
-MODULE_LICENSE("GPL");
diff --git a/lib/test_static_key_base.c b/lib/test_static_key_base.c
new file mode 100644
index 0000000..729447a
--- /dev/null
+++ b/lib/test_static_key_base.c
@@ -0,0 +1,68 @@
+/*
+ * Kernel module for testing static keys.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key base_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_old_true_key);
+struct static_key base_inv_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_inv_old_true_key);
+struct static_key base_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_old_false_key);
+struct static_key base_inv_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_inv_old_false_key);
+
+/* new keys */
+DEFINE_STATIC_KEY_TRUE(base_true_key);
+EXPORT_SYMBOL_GPL(base_true_key);
+DEFINE_STATIC_KEY_TRUE(base_inv_true_key);
+EXPORT_SYMBOL_GPL(base_inv_true_key);
+DEFINE_STATIC_KEY_FALSE(base_false_key);
+EXPORT_SYMBOL_GPL(base_false_key);
+DEFINE_STATIC_KEY_FALSE(base_inv_false_key);
+EXPORT_SYMBOL_GPL(base_inv_false_key);
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static int __init test_static_key_base_init(void)
+{
+	invert_key(&base_inv_old_true_key);
+	invert_key(&base_inv_old_false_key);
+	invert_key(&base_inv_true_key.key);
+	invert_key(&base_inv_false_key.key);
+
+	return 0;
+}
+
+static void __exit test_static_key_base_exit(void)
+{
+}
+
+module_init(test_static_key_base_init);
+module_exit(test_static_key_base_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
new file mode 100644
index 0000000..81d8105
--- /dev/null
+++ b/lib/test_static_keys.c
@@ -0,0 +1,225 @@
+/*
+ * Kernel module for testing static keys.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key old_true_key	= STATIC_KEY_INIT_TRUE;
+struct static_key old_false_key	= STATIC_KEY_INIT_FALSE;
+
+/* new api */
+DEFINE_STATIC_KEY_TRUE(true_key);
+DEFINE_STATIC_KEY_FALSE(false_key);
+
+/* external */
+extern struct static_key base_old_true_key;
+extern struct static_key base_inv_old_true_key;
+extern struct static_key base_old_false_key;
+extern struct static_key base_inv_old_false_key;
+
+/* new api */
+extern struct static_key_true base_true_key;
+extern struct static_key_true base_inv_true_key;
+extern struct static_key_false base_false_key;
+extern struct static_key_false base_inv_false_key;
+
+
+struct test_key {
+	bool			init_state;
+	struct static_key	*key;
+	bool			(*test_key)(void);
+};
+
+#define test_key_func(key, branch) \
+	({bool func(void) { return branch(key); } func;	})
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static void invert_keys(struct test_key *keys, int size)
+{
+	struct static_key *previous = NULL;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (previous != keys[i].key) {
+			invert_key(keys[i].key);
+			previous = keys[i].key;
+		}
+	}
+}
+
+int verify_keys(struct test_key *keys, int size, bool invert)
+{
+	int i;
+	bool ret, init;
+
+	for (i = 0; i < size; i++) {
+		ret = static_key_enabled(keys[i].key);
+		init = keys[i].init_state;
+		if (ret != (invert ? !init : init))
+			return -EINVAL;
+		ret = keys[i].test_key();
+		if (static_key_enabled(keys[i].key)) {
+			if (!ret)
+				return -EINVAL;
+		} else {
+			if (ret)
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int __init test_static_key_init(void)
+{
+	int ret;
+	int size;
+
+	struct test_key static_key_tests[] = {
+		/* internal keys - old keys */
+		{
+			.init_state	= true,
+			.key		= &old_true_key,
+			.test_key	= test_key_func(&old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &old_false_key,
+			.test_key	= test_key_func(&old_false_key, static_key_false),
+		},
+		/* internal keys - new keys */
+		{
+			.init_state	= true,
+			.key		= &true_key.key,
+			.test_key	= test_key_func(&true_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &true_key.key,
+			.test_key	= test_key_func(&true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &false_key.key,
+			.test_key	= test_key_func(&false_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &false_key.key,
+			.test_key	= test_key_func(&false_key, static_branch_unlikely),
+		},
+		/* external keys - old keys */
+		{
+			.init_state	= true,
+			.key		= &base_old_true_key,
+			.test_key	= test_key_func(&base_old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_old_true_key,
+			.test_key	= test_key_func(&base_inv_old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_old_false_key,
+			.test_key	= test_key_func(&base_old_false_key, static_key_false),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_old_false_key,
+			.test_key	= test_key_func(&base_inv_old_false_key, static_key_false),
+		},
+		/* external keys - new keys */
+		{
+			.init_state	= true,
+			.key		= &base_true_key.key,
+			.test_key	= test_key_func(&base_true_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_true_key.key,
+			.test_key	= test_key_func(&base_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_true_key.key,
+			.test_key	= test_key_func(&base_inv_true_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_true_key.key,
+			.test_key	= test_key_func(&base_inv_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_false_key.key,
+			.test_key	= test_key_func(&base_false_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_false_key.key,
+			.test_key	= test_key_func(&base_false_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_false_key.key,
+			.test_key	= test_key_func(&base_inv_false_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_false_key.key,
+			.test_key	= test_key_func(&base_inv_false_key, static_branch_unlikely),
+		},
+	};
+
+	size = ARRAY_SIZE(static_key_tests);
+
+	ret = verify_keys(static_key_tests, size, false);
+	if (ret)
+		goto out;
+
+	invert_keys(static_key_tests, size);
+	ret = verify_keys(static_key_tests, size, true);
+	if (ret)
+		goto out;
+
+	invert_keys(static_key_tests, size);
+	ret = verify_keys(static_key_tests, size, false);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static void __exit test_static_key_exit(void)
+{
+}
+
+module_init(test_static_key_init);
+module_exit(test_static_key_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/jumplabel/Makefile b/tools/testing/selftests/jumplabel/Makefile
deleted file mode 100644
index 7526aa5..0000000
--- a/tools/testing/selftests/jumplabel/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# Makefile for jump label selftests
-
-# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
-all:
-
-TEST_PROGS := test_jump_label.sh
-
-include ../lib.mk
diff --git a/tools/testing/selftests/jumplabel/test_jump_label.sh b/tools/testing/selftests/jumplabel/test_jump_label.sh
deleted file mode 100644
index 3457e8a..0000000
--- a/tools/testing/selftests/jumplabel/test_jump_label.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-# Runs jump label kernel module tests
-
-if /sbin/modprobe -q test_jump_label_base; then
-	if /sbin/modprobe -q test_jump_label; then
-		echo "jump_label: ok"
-		/sbin/modprobe -q -r test_jump_label
-		/sbin/modprobe -q -r test_jump_label_base
-	else
-		echo "jump_label: [FAIL]"
-		/sbin/modprobe -q -r test_jump_label_base
-	fi
-else
-	echo "jump_label: [FAIL]"
-	exit 1
-fi
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 0000000..9cdadf3
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100644
index 0000000..1261e3f
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Runs static keys kernel module tests
+
+if /sbin/modprobe -q test_static_key_base; then
+	if /sbin/modprobe -q test_static_keys; then
+		echo "static_key: ok"
+		/sbin/modprobe -q -r test_static_keys
+		/sbin/modprobe -q -r test_static_key_base
+	else
+		echo "static_keys: [FAIL]"
+		/sbin/modprobe -q -r test_static_key_base
+	fi
+else
+	echo "static_key: [FAIL]"
+	exit 1
+fi
-- 
cgit v0.10.2


From 412758cb26704e5087ca2976ec3b28fb2bdbfad4 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 30 Jul 2015 03:59:48 +0000
Subject: jump label, locking/static_keys: Update docs

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: bp@alien8.de
Cc: davem@davemloft.net
Cc: ddaney@caviumnetworks.com
Cc: heiko.carstens@de.ibm.com
Cc: linux-kernel@vger.kernel.org
Cc: liuj97@gmail.com
Cc: luto@amacapital.net
Cc: michael@ellerman.id.au
Cc: rabin@rab.in
Cc: ralf@linux-mips.org
Cc: rostedt@goodmis.org
Cc: vbabka@suse.cz
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/6b50f2f6423a2244f37f4b1d2d6c211b9dcdf4f8.1438227999.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index c4407a4..f4cb0b2 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -1,7 +1,22 @@
 			Static Keys
 			-----------
 
-By: Jason Baron <jbaron@redhat.com>
+DEPRECATED API:
+
+The use of 'struct static_key' directly, is now DEPRECATED. In addition
+static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+
+struct static_key false = STATIC_KEY_INIT_FALSE;
+struct static_key true = STATIC_KEY_INIT_TRUE;
+static_key_true()
+static_key_false()
+
+The updated API replacements are:
+
+DEFINE_STATIC_KEY_TRUE(key);
+DEFINE_STATIC_KEY_FALSE(key);
+static_key_likely()
+statick_key_unlikely()
 
 0) Abstract
 
@@ -9,22 +24,22 @@ Static keys allows the inclusion of seldom used features in
 performance-sensitive fast-path kernel code, via a GCC feature and a code
 patching technique. A quick example:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
 
 	...
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 	...
-	static_key_slow_inc();
+	static_branch_enable(&key);
 	...
-	static_key_slow_inc();
+	static_branch_disable(&key);
 	...
 
-The static_key_false() branch will be generated into the code with as little
+The static_branch_unlikely() branch will be generated into the code with as little
 impact to the likely code path as possible.
 
 
@@ -56,7 +71,7 @@ the branch site to change the branch direction.
 
 For example, if we have a simple branch that is disabled by default:
 
-	if (static_key_false(&key))
+	if (static_branch_unlikely(&key))
 		printk("I am the true branch\n");
 
 Thus, by default the 'printk' will not be emitted. And the code generated will
@@ -75,68 +90,55 @@ the basis for the static keys facility.
 
 In order to make use of this optimization you must first define a key:
 
-	struct static_key key;
-
-Which is initialized as:
-
-	struct static_key key = STATIC_KEY_INIT_TRUE;
+	DEFINE_STATIC_KEY_TRUE(key);
 
 or:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
+
 
-If the key is not initialized, it is default false. The 'struct static_key',
-must be a 'global'. That is, it can't be allocated on the stack or dynamically
+The key must be global, that is, it can't be allocated on the stack or dynamically
 allocated at run-time.
 
 The key is then used in code as:
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 Or:
 
-        if (static_key_true(&key))
+        if (static_branch_likely(&key))
                 do likely code
         else
                 do unlikely code
 
-A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
-'static_key_false()' construct. Likewise, a key initialized via
-'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
-single key can be used in many branches, but all the branches must match the
-way that the key has been initialized.
+Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may
+be used in either static_branch_likely() or static_branch_unlikely()
+statemnts.
 
-The branch(es) can then be switched via:
+Branch(es) can be set true via:
 
-	static_key_slow_inc(&key);
-	...
-	static_key_slow_dec(&key);
+static_branch_enable(&key);
 
-Thus, 'static_key_slow_inc()' means 'make the branch true', and
-'static_key_slow_dec()' means 'make the branch false' with appropriate
-reference counting. For example, if the key is initialized true, a
-static_key_slow_dec(), will switch the branch to false. And a subsequent
-static_key_slow_inc(), will change the branch back to true. Likewise, if the
-key is initialized false, a 'static_key_slow_inc()', will change the branch to
-true. And then a 'static_key_slow_dec()', will again make the branch false.
+or false via:
+
+static_branch_disable(&key);
 
-An example usage in the kernel is the implementation of tracepoints:
+The branch(es) can then be switched via reference counts:
 
-        static inline void trace_##name(proto)                          \
-        {                                                               \
-                if (static_key_false(&__tracepoint_##name.key))		\
-                        __DO_TRACE(&__tracepoint_##name,                \
-                                TP_PROTO(data_proto),                   \
-                                TP_ARGS(data_args),                     \
-                                TP_CONDITION(cond));                    \
-        }
+	static_branch_inc(&key);
+	...
+	static_branch_dec(&key);
 
-Tracepoints are disabled by default, and can be placed in performance critical
-pieces of the kernel. Thus, by using a static key, the tracepoints can have
-absolutely minimal impact when not in use.
+Thus, 'static_branch_inc()' means 'make the branch true', and
+'static_branch_dec()' means 'make the branch false' with appropriate
+reference counting. For example, if the key is initialized true, a
+static_branch_dec(), will switch the branch to false. And a subsequent
+static_branch_inc(), will change the branch back to true. Likewise, if the
+key is initialized false, a 'static_branch_inc()', will change the branch to
+true. And then a 'static_branch_dec()', will again make the branch false.
 
 
 4) Architecture level code patching interface, 'jump labels'
@@ -150,9 +152,12 @@ simply fall back to a traditional, load, test, and jump sequence.
 
 * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
 
-* __always_inline bool arch_static_branch(struct static_key *key), see:
+* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see:
 					arch/x86/include/asm/jump_label.h
 
+* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch),
+					see: arch/x86/include/asm/jump_label.h
+
 * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
 					see: arch/x86/kernel/jump_label.c
 
@@ -173,7 +178,7 @@ SYSCALL_DEFINE0(getppid)
 {
         int pid;
 
-+       if (static_key_false(&key))
++       if (static_branch_unlikely(&key))
 +               printk("I am the true branch\n");
 
         rcu_read_lock();
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index e337a19..7f653e8 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -7,17 +7,52 @@
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
  * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
  *
+ * DEPRECATED API:
+ *
+ * The use of 'struct static_key' directly, is now DEPRECATED. In addition
+ * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+ *
+ * struct static_key false = STATIC_KEY_INIT_FALSE;
+ * struct static_key true = STATIC_KEY_INIT_TRUE;
+ * static_key_true()
+ * static_key_false()
+ *
+ * The updated API replacements are:
+ *
+ * DEFINE_STATIC_KEY_TRUE(key);
+ * DEFINE_STATIC_KEY_FALSE(key);
+ * static_key_likely()
+ * statick_key_unlikely()
+ *
  * Jump labels provide an interface to generate dynamic branches using
- * self-modifying code. Assuming toolchain and architecture support, the result
- * of a "if (static_key_false(&key))" statement is an unconditional branch (which
- * defaults to false - and the true block is placed out of line).
+ * self-modifying code. Assuming toolchain and architecture support, if we
+ * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)",
+ * an "if (static_branch_unlikely(&key))" statement is an unconditional branch
+ * (which defaults to false - and the true block is placed out of line).
+ * Similarly, we can define an initially true key via
+ * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same
+ * "if (static_branch_unlikely(&key))", in which case we will generate an
+ * unconditional branch to the out-of-line true branch. Keys that are
+ * initially true or false can be using in both static_branch_unlikely()
+ * and static_branch_likely() statements.
  *
- * However at runtime we can change the branch target using
- * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
- * object, and for as long as there are references all branches referring to
- * that particular key will point to the (out of line) true block.
+ * At runtime we can change the branch target by setting the key
+ * to true via a call to static_branch_enable(), or false using
+ * static_branch_disable(). If the direction of the branch is switched by
+ * these calls then we run-time modify the branch target via a
+ * no-op -> jump or jump -> no-op conversion. For example, for an
+ * initially false key that is used in an "if (static_branch_unlikely(&key))"
+ * statement, setting the key to true requires us to patch in a jump
+ * to the out-of-line of true branch.
  *
- * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions
+ * In addtion to static_branch_{enable,disable}, we can also reference count
+ * the key or branch direction via static_branch_{inc,dec}. Thus,
+ * static_branch_inc() can be thought of as a 'make more true' and
+ * static_branch_dec() as a 'make more false'. The inc()/dec()
+ * interface is meant to be used exclusively from the inc()/dec() for a given
+ * key.
+ *
+ * Since this relies on modifying code, the branch modifying functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
  * OTOH, since the affected branches are unconditional, their runtime overhead
  * will be absolutely minimal, esp. in the default (off) case where the total
@@ -29,20 +64,10 @@
  * cause significant performance degradation. Struct static_key_deferred and
  * static_key_slow_dec_deferred() provide for this.
  *
- * Lacking toolchain and or architecture support, jump labels fall back to a simple
- * conditional branch.
- *
- * struct static_key my_key = STATIC_KEY_INIT_TRUE;
- *
- *   if (static_key_true(&my_key)) {
- *   }
- *
- * will result in the true case being in-line and starts the key with a single
- * reference. Mixing static_key_true() and static_key_false() on the same key is not
- * allowed.
+ * Lacking toolchain and or architecture support, static keys fall back to a
+ * simple conditional branch.
  *
- * Not initializing the key (static data is initialized to 0s anyway) is the
- * same as using STATIC_KEY_INIT_FALSE.
+ * Additional babbling in: Documentation/static-keys.txt
  */
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
-- 
cgit v0.10.2


From 20f9ed1568c00bbd9e6af31341d25e06bc3d4a16 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 4 Aug 2015 02:47:48 +0800
Subject: locking/static_keys: Make verify_keys() static

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150803184748.GA80634@lkp-ib04
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
index 81d8105..c61b299 100644
--- a/lib/test_static_keys.c
+++ b/lib/test_static_keys.c
@@ -70,7 +70,7 @@ static void invert_keys(struct test_key *keys, int size)
 	}
 }
 
-int verify_keys(struct test_key *keys, int size, bool invert)
+static int verify_keys(struct test_key *keys, int size, bool invert)
 {
 	int i;
 	bool ret, init;
-- 
cgit v0.10.2


From ba33034fffc1189d95301bd865f1c799256e72a2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 4 Aug 2015 09:55:48 +0200
Subject: locking, compiler.h: Cast away attributes in the WRITE_ONCE() magic

The kernel build bot showed a new warning triggered by commit:

  76695af20c01 ("locking, arch: use WRITE_ONCE()/READ_ONCE() in smp_store_release()/smp_load_acquire()")

because Sparse does not like WRITE_ONCE() accessing elements
from the (sparse) RCU address space:

  fs/afs/inode.c:448:9: sparse: incorrect type in initializer (different address spaces)
  fs/afs/inode.c:448:9:    expected struct afs_permits *__val
  fs/afs/inode.c:448:9:    got void [noderef] <asn:4>*<noident>

Solution is to force cast away the sparse attributes for the initializer
of the union in WRITE_ONCE().

(And as this now gets too long, also split the macro into multiple lines.)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1438674948-38310-2-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index e08a6ae..c836eb2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,7 +252,12 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #define WRITE_ONCE(x, val) \
-	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (__force typeof(x)) (val) }; \
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
 
 /**
  * READ_ONCE_CTRL - Read a value heading a control dependency
-- 
cgit v0.10.2


From 654672d4ba1a6001c365833be895f9477c4d5eab Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:37 +0100
Subject: locking/atomics: Add _{acquire|release|relaxed}() variants of some
 atomic operations

Whilst porting the generic qrwlock code over to arm64, it became
apparent that any portable locking code needs finer-grained control of
the memory-ordering guarantees provided by our atomic routines.

In particular: xchg, cmpxchg, {add,sub}_return are often used in
situations where full barrier semantics (currently the only option
available) are not required. For example, when a reader increments a
reader count to obtain a lock, checking the old value to see if a writer
was present, only acquire semantics are strictly needed.

This patch introduces three new ordering semantics for these operations:

  - *_relaxed: No ordering guarantees. This is similar to what we have
               already for the non-return atomics (e.g. atomic_add).

  - *_acquire: ACQUIRE semantics, similar to smp_load_acquire.

  - *_release: RELEASE semantics, similar to smp_store_release.

In memory-ordering speak, this means that the acquire/release semantics
are RCpc as opposed to RCsc. Consequently a RELEASE followed by an
ACQUIRE does not imply a full barrier, as already documented in
memory-barriers.txt.

Currently, all the new macros are conditionally mapped to the full-mb
variants, however if the *_relaxed version is provided by the
architecture, then the acquire/release variants are constructed by
supplementing the relaxed routine with an explicit barrier.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-2-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8b98b42..00a5763 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,329 @@
 #ifndef _LINUX_ATOMIC_H
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
+#include <asm/barrier.h>
+
+/*
+ * Relaxed variants of xchg, cmpxchg and some atomic operations.
+ *
+ * We support four variants:
+ *
+ * - Fully ordered: The default implementation, no suffix required.
+ * - Acquire: Provides ACQUIRE semantics, _acquire suffix.
+ * - Release: Provides RELEASE semantics, _release suffix.
+ * - Relaxed: No ordering guarantees, _relaxed suffix.
+ *
+ * For compound atomics performing both a load and a store, ACQUIRE
+ * semantics apply only to the load and RELEASE semantics only to the
+ * store portion of the operation. Note that a failed cmpxchg_acquire
+ * does -not- imply any memory ordering constraints.
+ *
+ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions.
+ */
+
+#ifndef atomic_read_acquire
+#define  atomic_read_acquire(v)		smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic_set_release
+#define  atomic_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/*
+ * The idea here is to build acquire/release variants by adding explicit
+ * barriers on top of the relaxed variant. In the case where the relaxed
+ * variant is already fully ordered, no additional barriers are needed.
+ */
+#define __atomic_op_acquire(op, args...)				\
+({									\
+	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+#define __atomic_op_release(op, args...)				\
+({									\
+	smp_mb__before_atomic();					\
+	op##_relaxed(args);						\
+})
+
+#define __atomic_op_fence(op, args...)					\
+({									\
+	typeof(op##_relaxed(args)) __ret;				\
+	smp_mb__before_atomic();					\
+	__ret = op##_relaxed(args);					\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+/* atomic_add_return_relaxed */
+#ifndef atomic_add_return_relaxed
+#define  atomic_add_return_relaxed	atomic_add_return
+#define  atomic_add_return_acquire	atomic_add_return
+#define  atomic_add_return_release	atomic_add_return
+
+#else /* atomic_add_return_relaxed */
+
+#ifndef atomic_add_return_acquire
+#define  atomic_add_return_acquire(...)					\
+	__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return_release
+#define  atomic_add_return_release(...)					\
+	__atomic_op_release(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return
+#define  atomic_add_return(...)						\
+	__atomic_op_fence(atomic_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic_add_return_relaxed */
+
+/* atomic_sub_return_relaxed */
+#ifndef atomic_sub_return_relaxed
+#define  atomic_sub_return_relaxed	atomic_sub_return
+#define  atomic_sub_return_acquire	atomic_sub_return
+#define  atomic_sub_return_release	atomic_sub_return
+
+#else /* atomic_sub_return_relaxed */
+
+#ifndef atomic_sub_return_acquire
+#define  atomic_sub_return_acquire(...)					\
+	__atomic_op_acquire(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return_release
+#define  atomic_sub_return_release(...)					\
+	__atomic_op_release(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return
+#define  atomic_sub_return(...)						\
+	__atomic_op_fence(atomic_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic_sub_return_relaxed */
+
+/* atomic_xchg_relaxed */
+#ifndef atomic_xchg_relaxed
+#define  atomic_xchg_relaxed		atomic_xchg
+#define  atomic_xchg_acquire		atomic_xchg
+#define  atomic_xchg_release		atomic_xchg
+
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+#define  atomic_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg_release
+#define  atomic_xchg_release(...)					\
+	__atomic_op_release(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg
+#define  atomic_xchg(...)						\
+	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic_xchg_relaxed */
+
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
+#define  atomic_cmpxchg_acquire		atomic_cmpxchg
+#define  atomic_cmpxchg_release		atomic_cmpxchg
+
+#else /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic_cmpxchg_acquire
+#define  atomic_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg_release
+#define  atomic_cmpxchg_release(...)					\
+	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg
+#define  atomic_cmpxchg(...)						\
+	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic64_read_acquire
+#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic64_set_release
+#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/* atomic64_add_return_relaxed */
+#ifndef atomic64_add_return_relaxed
+#define  atomic64_add_return_relaxed	atomic64_add_return
+#define  atomic64_add_return_acquire	atomic64_add_return
+#define  atomic64_add_return_release	atomic64_add_return
+
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+#define  atomic64_add_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return_release
+#define  atomic64_add_return_release(...)				\
+	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return
+#define  atomic64_add_return(...)					\
+	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_add_return_relaxed */
+
+/* atomic64_sub_return_relaxed */
+#ifndef atomic64_sub_return_relaxed
+#define  atomic64_sub_return_relaxed	atomic64_sub_return
+#define  atomic64_sub_return_acquire	atomic64_sub_return
+#define  atomic64_sub_return_release	atomic64_sub_return
+
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+#define  atomic64_sub_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return_release
+#define  atomic64_sub_return_release(...)				\
+	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return
+#define  atomic64_sub_return(...)					\
+	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_sub_return_relaxed */
+
+/* atomic64_xchg_relaxed */
+#ifndef atomic64_xchg_relaxed
+#define  atomic64_xchg_relaxed		atomic64_xchg
+#define  atomic64_xchg_acquire		atomic64_xchg
+#define  atomic64_xchg_release		atomic64_xchg
+
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+#define  atomic64_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg_release
+#define  atomic64_xchg_release(...)					\
+	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg
+#define  atomic64_xchg(...)						\
+	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_xchg_relaxed */
+
+/* atomic64_cmpxchg_relaxed */
+#ifndef atomic64_cmpxchg_relaxed
+#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
+#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
+#define  atomic64_cmpxchg_release	atomic64_cmpxchg
+
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+#define  atomic64_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg_release
+#define  atomic64_cmpxchg_release(...)					\
+	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg
+#define  atomic64_cmpxchg(...)						\
+	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_cmpxchg_relaxed */
+
+/* cmpxchg_relaxed */
+#ifndef cmpxchg_relaxed
+#define  cmpxchg_relaxed		cmpxchg
+#define  cmpxchg_acquire		cmpxchg
+#define  cmpxchg_release		cmpxchg
+
+#else /* cmpxchg_relaxed */
+
+#ifndef cmpxchg_acquire
+#define  cmpxchg_acquire(...)						\
+	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg_release
+#define  cmpxchg_release(...)						\
+	__atomic_op_release(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg
+#define  cmpxchg(...)							\
+	__atomic_op_fence(cmpxchg, __VA_ARGS__)
+#endif
+#endif /* cmpxchg_relaxed */
+
+/* cmpxchg64_relaxed */
+#ifndef cmpxchg64_relaxed
+#define  cmpxchg64_relaxed		cmpxchg64
+#define  cmpxchg64_acquire		cmpxchg64
+#define  cmpxchg64_release		cmpxchg64
+
+#else /* cmpxchg64_relaxed */
+
+#ifndef cmpxchg64_acquire
+#define  cmpxchg64_acquire(...)						\
+	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64_release
+#define  cmpxchg64_release(...)						\
+	__atomic_op_release(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64
+#define  cmpxchg64(...)							\
+	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
+#endif
+#endif /* cmpxchg64_relaxed */
+
+/* xchg_relaxed */
+#ifndef xchg_relaxed
+#define  xchg_relaxed			xchg
+#define  xchg_acquire			xchg
+#define  xchg_release			xchg
+
+#else /* xchg_relaxed */
+
+#ifndef xchg_acquire
+#define  xchg_acquire(...)		__atomic_op_acquire(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg_release
+#define  xchg_release(...)		__atomic_op_release(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg
+#define  xchg(...)			__atomic_op_fence(xchg, __VA_ARGS__)
+#endif
+#endif /* xchg_relaxed */
 
 /**
  * atomic_add_unless - add unless the number is already a given value
-- 
cgit v0.10.2


From 586b610e43a5ad5096640312fefa6ce931738c7d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:38 +0100
Subject: locking, asm-generic: Rework atomic-long.h to avoid bulk code
 duplication

We can use some (admittedly ugly) macros to generate the 32-bit and
64-bit based atomic_long implementations from the same code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-3-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index b7babf0..beaea54 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -23,236 +23,127 @@
 typedef atomic64_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+#define ATOMIC_LONG_PFX(x)	atomic64 ## x
 
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(v), (new)))
-
-#else  /*  BITS_PER_LONG == 64  */
+#else
 
 typedef atomic_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
+#define ATOMIC_LONG_PFX(x)	atomic ## x
+
+#endif
+
 static inline long atomic_long_read(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_read(v);
+	return (long)ATOMIC_LONG_PFX(_read)(v);
 }
 
 static inline void atomic_long_set(atomic_long_t *l, long i)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_set(v, i);
+	ATOMIC_LONG_PFX(_set)(v, i);
 }
 
 static inline void atomic_long_inc(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_inc(v);
+	ATOMIC_LONG_PFX(_inc)(v);
 }
 
 static inline void atomic_long_dec(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_dec(v);
+	ATOMIC_LONG_PFX(_dec)(v);
 }
 
 static inline void atomic_long_add(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_add(i, v);
+	ATOMIC_LONG_PFX(_add)(i, v);
 }
 
 static inline void atomic_long_sub(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_sub(i, v);
+	ATOMIC_LONG_PFX(_sub)(i, v);
 }
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_sub_and_test(i, v);
+	return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
 }
 
 static inline int atomic_long_dec_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_dec_and_test(v);
+	return ATOMIC_LONG_PFX(_dec_and_test)(v);
 }
 
 static inline int atomic_long_inc_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_inc_and_test(v);
+	return ATOMIC_LONG_PFX(_inc_and_test)(v);
 }
 
 static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_add_negative(i, v);
+	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
 static inline long atomic_long_add_return(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_add_return(i, v);
+	return (long)ATOMIC_LONG_PFX(_add_return)(i, v);
 }
 
 static inline long atomic_long_sub_return(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_sub_return(i, v);
+	return (long)ATOMIC_LONG_PFX(_sub_return)(i, v);
 }
 
 static inline long atomic_long_inc_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_inc_return(v);
+	return (long)ATOMIC_LONG_PFX(_inc_return)(v);
 }
 
 static inline long atomic_long_dec_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_dec_return(v);
+	return (long)ATOMIC_LONG_PFX(_dec_return)(v);
 }
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_add_unless(v, a, u);
+	return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
 }
 
-#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
-
+#define atomic_long_inc_not_zero(l) \
+	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
 #define atomic_long_cmpxchg(l, old, new) \
-	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
+	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(v), (new)))
-
-#endif  /*  BITS_PER_LONG == 64  */
+	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
-- 
cgit v0.10.2


From 6d79ef2d30ee5af7315535d1e7bf6fce0008f815 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:39 +0100
Subject: locking, asm-generic: Add _{relaxed|acquire|release}() variants for
 'atomic_long_t'

This patch adds 'atomic_long_t' wrappers for the new relaxed atomic operations.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-4-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index beaea54..a94cbeb 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -34,19 +34,69 @@ typedef atomic_t atomic_long_t;
 
 #endif
 
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_read)(v);
+#define ATOMIC_LONG_READ_OP(mo)						\
+static inline long atomic_long_read##mo(atomic_long_t *l)		\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_read##mo)(v);			\
 }
+ATOMIC_LONG_READ_OP()
+ATOMIC_LONG_READ_OP(_acquire)
 
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+#undef ATOMIC_LONG_READ_OP
 
-	ATOMIC_LONG_PFX(_set)(v, i);
+#define ATOMIC_LONG_SET_OP(mo)						\
+static inline void atomic_long_set##mo(atomic_long_t *l, long i)	\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	ATOMIC_LONG_PFX(_set##mo)(v, i);				\
+}
+ATOMIC_LONG_SET_OP()
+ATOMIC_LONG_SET_OP(_release)
+
+#undef ATOMIC_LONG_SET_OP
+
+#define ATOMIC_LONG_ADD_SUB_OP(op, mo)					\
+static inline long							\
+atomic_long_##op##_return##mo(long i, atomic_long_t *l)			\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v);		\
 }
+ATOMIC_LONG_ADD_SUB_OP(add,)
+ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(add, _release)
+ATOMIC_LONG_ADD_SUB_OP(sub,)
+ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(sub, _release)
+
+#undef ATOMIC_LONG_ADD_SUB_OP
+
+#define atomic_long_cmpxchg_relaxed(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_acquire(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_release(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
+
+#define atomic_long_xchg_relaxed(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_acquire(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_release(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg(v, new) \
+	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 static inline void atomic_long_inc(atomic_long_t *l)
 {
@@ -104,20 +154,6 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_add_return)(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_sub_return)(i, v);
-}
-
 static inline long atomic_long_inc_return(atomic_long_t *l)
 {
 	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
@@ -141,9 +177,5 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 
 #define atomic_long_inc_not_zero(l) \
 	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
-#define atomic_long_cmpxchg(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
-- 
cgit v0.10.2


From f5468ffde13fc991bd4d6bdec507ffd5777865bd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:40 +0100
Subject: locking/lockref: Remove homebrew cmpxchg64_relaxed() macro definition

cmpxchg64_relaxed() is now defined by linux/atomic.h, so we can
remove our local definition from the lockref code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-5-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/lib/lockref.c b/lib/lockref.c
index 494994b..5a92189 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -4,14 +4,6 @@
 #if USE_CMPXCHG_LOCKREF
 
 /*
- * Allow weakly-ordered memory architectures to provide barrier-less
- * cmpxchg semantics for lockref updates.
- */
-#ifndef cmpxchg64_relaxed
-# define cmpxchg64_relaxed cmpxchg64
-#endif
-
-/*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
  */
-- 
cgit v0.10.2


From 2b2a85a4d3534b8884fcfa5bb52837f0e1c672bc Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:41 +0100
Subject: locking/qrwlock: Implement queue_write_unlock() using
 smp_store_release()

Since the following commit:

  536fa402221f ("compiler: Allow 1- and 2-byte smp_load_acquire() and smp_store_release()")

smp_store_release() supports byte accesses, so use that in writer unlock
and remove the conditional macro override.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Waiman Long <Waiman.Long@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-6-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index a8810bf..c537cbb 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -2,16 +2,6 @@
 #define _ASM_X86_QRWLOCK_H
 
 #include <asm-generic/qrwlock_types.h>
-
-#ifndef CONFIG_X86_PPRO_FENCE
-#define queued_write_unlock queued_write_unlock
-static inline void queued_write_unlock(struct qrwlock *lock)
-{
-        barrier();
-        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
-}
-#endif
-
 #include <asm-generic/qrwlock.h>
 
 #endif /* _ASM_X86_QRWLOCK_H */
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index deb9e8b..eb673dd 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -134,21 +134,14 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 	atomic_sub(_QR_BIAS, &lock->cnts);
 }
 
-#ifndef queued_write_unlock
 /**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
-	/*
-	 * If the writer field is atomic, it can be cleared directly.
-	 * Otherwise, an atomic subtraction will be used to clear it.
-	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QW_LOCKED, &lock->cnts);
+	smp_store_release((u8 *)&lock->cnts, 0);
 }
-#endif
 
 /*
  * Remapping rwlock architecture specific functions to the corresponding
-- 
cgit v0.10.2


From 77e430e3e45662b696dc49aa53ea0f7ac63f2574 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:42 +0100
Subject: locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics

The qrwlock implementation is slightly heavy in its use of memory
barriers, mainly through the use of _cmpxchg() and _return() atomics, which
imply full barrier semantics.

This patch modifies the qrwlock code to use the more relaxed atomic
routines so that we can reduce the unnecessary barrier overhead on
weakly-ordered architectures.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index eb673dd..54a8e65 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock)
 
 	cnts = atomic_read(&lock->cnts);
 	if (likely(!(cnts & _QW_WMASK))) {
-		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 		if (likely(!(cnts & _QW_WMASK)))
 			return 1;
 		atomic_sub(_QR_BIAS, &lock->cnts);
@@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg(&lock->cnts,
-				     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_cmpxchg_acquire(&lock->cnts,
+					     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 	if (likely(!(cnts & _QW_WMASK)))
 		return;
 
@@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
 	queued_write_lock_slowpath(lock);
@@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 	/*
 	 * Atomically decrement the reader count
 	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QR_BIAS, &lock->cnts);
+	(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 6a7a3b8..f17a3e3 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -55,7 +55,7 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
 		cpu_relax_lowlatency();
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		cnts = atomic_read_acquire(&lock->cnts);
 	}
 }
 
@@ -74,8 +74,9 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 		 * Readers in interrupt context will get the lock immediately
 		 * if the writer is just waiting (not holding the lock yet).
 		 * The rspin_until_writer_unlock() function returns immediately
-		 * in this case. Otherwise, they will spin until the lock
-		 * is available without waiting in the queue.
+		 * in this case. Otherwise, they will spin (with ACQUIRE
+		 * semantics) until the lock is available without waiting in
+		 * the queue.
 		 */
 		rspin_until_writer_unlock(lock, cnts);
 		return;
@@ -88,12 +89,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	arch_spin_lock(&lock->lock);
 
 	/*
-	 * At the head of the wait queue now, increment the reader count
-	 * and wait until the writer, if it has the lock, has gone away.
-	 * At ths stage, it is not possible for a writer to remain in the
-	 * waiting state (_QW_WAITING). So there won't be any deadlock.
+	 * The ACQUIRE semantics of the following spinning code ensure
+	 * that accesses can't leak upwards out of our subsequent critical
+	 * section in the case that the lock is currently held for write.
 	 */
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
 
 	/*
@@ -116,7 +116,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 
 	/* Try to acquire the lock directly if no reader is present */
 	if (!atomic_read(&lock->cnts) &&
-	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
 		goto unlock;
 
 	/*
@@ -127,7 +127,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 		struct __qrwlock *l = (struct __qrwlock *)lock;
 
 		if (!READ_ONCE(l->wmode) &&
-		   (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
+		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
 			break;
 
 		cpu_relax_lowlatency();
@@ -137,8 +137,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	for (;;) {
 		cnts = atomic_read(&lock->cnts);
 		if ((cnts == _QW_WAITING) &&
-		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
-				    _QW_LOCKED) == _QW_WAITING))
+		    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
+					    _QW_LOCKED) == _QW_WAITING))
 			break;
 
 		cpu_relax_lowlatency();
-- 
cgit v0.10.2


From cd074aea9261784e44f292e1132830ec221802c6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:43 +0100
Subject: locking, include/llist: Use linux/atomic.h instead of asm/cmpxchg.h

Including an asm/ header directly is best avoided, so use linux/atomic.h
instead of asm/cmpxchg.h in linux/llist.h.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-8-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/llist.h b/include/linux/llist.h
index fbf10a0..fd4ca0b 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -55,8 +55,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
-#include <asm/cmpxchg.h>
 
 struct llist_head {
 	struct llist_node *first;
-- 
cgit v0.10.2


From 0ca326de7aa9cb253db9c1a3eb3f0487c8dbf912 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:44 +0100
Subject: locking, ARM, atomics: Define our SMP atomics in terms of _relaxed()
 operations

By defining our SMP atomics in terms of relaxed operations, we gain
a small reduction in code size and have acquire/release/fence variants
generated automatically by the core code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-9-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 82b75a7..fe3ef39 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -57,12 +57,11 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
-	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
 	__asm__ __volatile__("@ atomic_" #op "_return\n"		\
@@ -75,17 +74,17 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	: "r" (&v->counter), "Ir" (i)					\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return result;							\
 }
 
-static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+
+static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
 	int oldval;
 	unsigned long res;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	do {
@@ -99,10 +98,9 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 		    : "cc");
 	} while (res);
 
-	smp_mb();
-
 	return oldval;
 }
+#define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
 
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
@@ -297,12 +295,12 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)				\
-static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
+static inline long long							\
+atomic64_##op##_return_relaxed(long long i, atomic64_t *v)		\
 {									\
 	long long result;						\
 	unsigned long tmp;						\
 									\
-	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
 	__asm__ __volatile__("@ atomic64_" #op "_return\n"		\
@@ -316,8 +314,6 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
 	: "r" (&v->counter), "r" (i)					\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return result;							\
 }
 
@@ -328,6 +324,9 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+
 #define atomic64_andnot atomic64_andnot
 
 ATOMIC64_OP(and, and, and)
@@ -339,13 +338,12 @@ ATOMIC64_OP(xor, eor, eor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
-					long long new)
+static inline long long
+atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
 {
 	long long oldval;
 	unsigned long res;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	do {
@@ -360,17 +358,15 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
 		: "cc");
 	} while (res);
 
-	smp_mb();
-
 	return oldval;
 }
+#define atomic64_cmpxchg_relaxed	atomic64_cmpxchg_relaxed
 
-static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
 {
 	long long result;
 	unsigned long tmp;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	__asm__ __volatile__("@ atomic64_xchg\n"
@@ -382,10 +378,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
 	: "r" (&ptr->counter), "r" (new)
 	: "cc");
 
-	smp_mb();
-
 	return result;
 }
+#define atomic64_xchg_relaxed		atomic64_xchg_relaxed
 
 static inline long long atomic64_dec_if_positive(atomic64_t *v)
 {
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 1692a05..916a274 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -35,7 +35,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	unsigned int tmp;
 #endif
 
-	smp_mb();
 	prefetchw((const void *)ptr);
 
 	switch (size) {
@@ -98,12 +97,11 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		__bad_xchg(ptr, size), ret = 0;
 		break;
 	}
-	smp_mb();
 
 	return ret;
 }
 
-#define xchg(ptr, x) ({							\
+#define xchg_relaxed(ptr, x) ({						\
 	(__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr),		\
 				   sizeof(*(ptr)));			\
 })
@@ -117,6 +115,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 #error "SMP is not supported on this platform"
 #endif
 
+#define xchg xchg_relaxed
+
 /*
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
@@ -194,23 +194,11 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return oldval;
 }
 
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
-					 unsigned long new, int size)
-{
-	unsigned long ret;
-
-	smp_mb();
-	ret = __cmpxchg(ptr, old, new, size);
-	smp_mb();
-
-	return ret;
-}
-
-#define cmpxchg(ptr,o,n) ({						\
-	(__typeof__(*(ptr)))__cmpxchg_mb((ptr),				\
-					 (unsigned long)(o),		\
-					 (unsigned long)(n),		\
-					 sizeof(*(ptr)));		\
+#define cmpxchg_relaxed(ptr,o,n) ({					\
+	(__typeof__(*(ptr)))__cmpxchg((ptr),				\
+				      (unsigned long)(o),		\
+				      (unsigned long)(n),		\
+				      sizeof(*(ptr)));			\
 })
 
 static inline unsigned long __cmpxchg_local(volatile void *ptr,
@@ -273,25 +261,6 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 
 #define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
 
-static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
-						unsigned long long old,
-						unsigned long long new)
-{
-	unsigned long long ret;
-
-	smp_mb();
-	ret = __cmpxchg64(ptr, old, new);
-	smp_mb();
-
-	return ret;
-}
-
-#define cmpxchg64(ptr, o, n) ({						\
-	(__typeof__(*(ptr)))__cmpxchg64_mb((ptr),			\
-					   (unsigned long long)(o),	\
-					   (unsigned long long)(n));	\
-})
-
 #endif	/* __LINUX_ARM_ARCH__ >= 6 */
 
 #endif /* __ASM_ARM_CMPXCHG_H */
-- 
cgit v0.10.2


From d420acd816c07c7be31bd19d09cbcb16e5572fa6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 12 Aug 2015 21:04:22 +0200
Subject: jump_label/x86: Work around asm build bug on older/backported GCCs

Boris reported that gcc version 4.4.4 20100503 (Red Hat
4.4.4-2) fails to build linux-next kernels that have
this fresh commit via the locking tree:

  11276d5306b8 ("locking/static_keys: Add a new static_key interface")

The problem appears to be that even though @key and @branch are
compile time constants, it doesn't see the following expression
as an immediate value:

   &((char *)key)[branch]

More recent GCCs don't appear to have this problem.

In particular, Red Hat backported the 'asm goto' feature into 4.4,
'normal' 4.4 compilers will not have this feature and thus not
run into this asm.

The workaround is to supply both values to the asm as immediates
and do the addition in asm.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 28d7a85..5daeca3 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -22,9 +22,9 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
-		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
 		".popsection \n\t"
-		: :  "i" (&((char *)key)[branch]) : : l_yes);
+		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
 l_yes:
@@ -38,9 +38,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 		"2:\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
-		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
 		".popsection \n\t"
-		: :  "i" (&((char *)key)[branch]) : : l_yes);
+		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
 l_yes:
-- 
cgit v0.10.2