From 21b05de4c8fac08fff08cf84ef1d4fe5786f9608 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 May 2015 17:29:51 -0700
Subject: documentation: Bring rcutorture parameters up to date

This commit changes the documentation of the rcutorture parameters to
better match reality.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f045..01b5b68 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3135,22 +3135,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			in a given burst of a callback-flood test.
 
 	rcutorture.fqs_duration= [KNL]
-			Set duration of force_quiescent_state bursts.
+			Set duration of force_quiescent_state bursts
+			in microseconds.
 
 	rcutorture.fqs_holdoff= [KNL]
-			Set holdoff time within force_quiescent_state bursts.
+			Set holdoff time within force_quiescent_state bursts
+			in microseconds.
 
 	rcutorture.fqs_stutter= [KNL]
-			Set wait time between force_quiescent_state bursts.
+			Set wait time between force_quiescent_state bursts
+			in seconds.
+
+	rcutorture.gp_cond= [KNL]
+			Use conditional/asynchronous update-side
+			primitives, if available.
 
 	rcutorture.gp_exp= [KNL]
-			Use expedited update-side primitives.
+			Use expedited update-side primitives, if available.
 
 	rcutorture.gp_normal= [KNL]
-			Use normal (non-expedited) update-side primitives.
-			If both gp_exp and gp_normal are set, do both.
-			If neither gp_exp nor gp_normal are set, still
-			do both.
+			Use normal (non-expedited) asynchronous
+			update-side primitives, if available.
+
+	rcutorture.gp_sync= [KNL]
+			Use normal (non-expedited) synchronous
+			update-side primitives, if available.  If all
+			of rcutorture.gp_cond=, rcutorture.gp_exp=,
+			rcutorture.gp_normal=, and rcutorture.gp_sync=
+			are zero, rcutorture acts as if is interpreted
+			they are all non-zero.
 
 	rcutorture.n_barrier_cbs= [KNL]
 			Set callbacks/threads for rcu_barrier() testing.
@@ -3177,9 +3190,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Set time (s) between CPU-hotplug operations, or
 			zero to disable CPU-hotplug testing.
 
-	rcutorture.torture_runnable= [BOOT]
-			Start rcutorture running at boot time.
-
 	rcutorture.shuffle_interval= [KNL]
 			Set task-shuffle interval (s).  Shuffling tasks
 			allows some CPUs to go into dyntick-idle mode
@@ -3220,6 +3230,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Test RCU's dyntick-idle handling.  See also the
 			rcutorture.shuffle_interval parameter.
 
+	rcutorture.torture_runnable= [BOOT]
+			Start rcutorture running at boot time.
+
 	rcutorture.torture_type= [KNL]
 			Specify the RCU implementation to test.
 
-- 
cgit v0.10.2


From 297f739938908a4262603314576e32ee7375296c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 May 2015 17:31:07 -0700
Subject: documentation: Fix spelling of "operators"

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index 1e6c0da..c0bf244 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -28,7 +28,7 @@ o	You must use one of the rcu_dereference() family of primitives
 o	Avoid cancellation when using the "+" and "-" infix arithmetic
 	operators.  For example, for a given variable "x", avoid
 	"(x-x)".  There are similar arithmetic pitfalls from other
-	arithmetic operatiors, such as "(x*0)", "(x/(x+1))" or "(x%1)".
+	arithmetic operators, such as "(x*0)", "(x/(x+1))" or "(x%1)".
 	The compiler is within its rights to substitute zero for all of
 	these expressions, so that subsequent accesses no longer depend
 	on the rcu_dereference(), again possibly resulting in bugs due
-- 
cgit v0.10.2


From 57aecae950c55ef50934640794160cd118e73256 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 18 May 2015 18:27:42 -0700
Subject: documentation: Fix variable-name typo in memory-barriers.txt

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 13feb69..3d06f98 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -746,7 +746,7 @@ You must also be careful not to rely too much on boolean short-circuit
 evaluation.  Consider this example:
 
 	q = READ_ONCE_CTRL(a);
-	if (a || 1 > 0)
+	if (q || 1 > 0)
 		ACCESS_ONCE(b) = 1;
 
 Because the first condition cannot fault and the second condition is
-- 
cgit v0.10.2


From 9af194cefc3c40e75a59df4cbb06e1c1064bee7f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 18 Jun 2015 14:33:24 -0700
Subject: documentation: Replace ACCESS_ONCE() by READ_ONCE() and WRITE_ONCE()

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3d06f98..470c07c 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,22 +194,22 @@ There are some minimal guarantees that may be expected of a CPU:
  (*) On any given CPU, dependent memory accesses will be issued in order, with
      respect to itself.  This means that for:
 
-	ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
+	WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
 
      the CPU will issue the following memory operations:
 
 	Q = LOAD P, D = LOAD *Q
 
      and always in that order.  On most systems, smp_read_barrier_depends()
-     does nothing, but it is required for DEC Alpha.  The ACCESS_ONCE()
-     is required to prevent compiler mischief.  Please note that you
-     should normally use something like rcu_dereference() instead of
-     open-coding smp_read_barrier_depends().
+     does nothing, but it is required for DEC Alpha.  The READ_ONCE()
+     and WRITE_ONCE() are required to prevent compiler mischief.  Please
+     note that you should normally use something like rcu_dereference()
+     instead of open-coding smp_read_barrier_depends().
 
  (*) Overlapping loads and stores within a particular CPU will appear to be
      ordered within that CPU.  This means that for:
 
-	a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
+	a = READ_ONCE(*X); WRITE_ONCE(*X, b);
 
      the CPU will only issue the following sequence of memory operations:
 
@@ -217,7 +217,7 @@ There are some minimal guarantees that may be expected of a CPU:
 
      And for:
 
-	ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
+	WRITE_ONCE(*X, c); d = READ_ONCE(*X);
 
      the CPU will only issue:
 
@@ -228,11 +228,11 @@ There are some minimal guarantees that may be expected of a CPU:
 
 And there are a number of things that _must_ or _must_not_ be assumed:
 
- (*) It _must_not_ be assumed that the compiler will do what you want with
-     memory references that are not protected by ACCESS_ONCE().  Without
-     ACCESS_ONCE(), the compiler is within its rights to do all sorts
-     of "creative" transformations, which are covered in the Compiler
-     Barrier section.
+ (*) It _must_not_ be assumed that the compiler will do what you want
+     with memory references that are not protected by READ_ONCE() and
+     WRITE_ONCE().  Without them, the compiler is within its rights to
+     do all sorts of "creative" transformations, which are covered in
+     the Compiler Barrier section.
 
  (*) It _must_not_ be assumed that independent loads and stores will be issued
      in the order given.  This means that for:
@@ -520,8 +520,8 @@ following sequence of events:
 	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = &B
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, &B)
+			      Q = READ_ONCE(P);
 			      D = *Q;
 
 There's a clear data dependency here, and it would seem that by the end of the
@@ -547,8 +547,8 @@ between the address load and the data load:
 	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = &B
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, &B);
+			      Q = READ_ONCE(P);
 			      <data dependency barrier>
 			      D = *Q;
 
@@ -574,8 +574,8 @@ access:
 	{ M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
 	M[1] = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = 1
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, 1);
+			      Q = READ_ONCE(P);
 			      <data dependency barrier>
 			      D = M[Q];
 
@@ -596,10 +596,10 @@ A load-load control dependency requires a full read memory barrier, not
 simply a data dependency barrier to make it work correctly.  Consider the
 following bit of code:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		<data dependency barrier>  /* BUG: No data dependency!!! */
-		p = ACCESS_ONCE(b);
+		p = READ_ONCE(b);
 	}
 
 This will not have the desired effect because there is no actual data
@@ -608,10 +608,10 @@ by attempting to predict the outcome in advance, so that other CPUs see
 the load from b as having happened before the load from a.  In such a
 case what's actually required is:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		<read barrier>
-		p = ACCESS_ONCE(b);
+		p = READ_ONCE(b);
 	}
 
 However, stores are not speculated.  This means that ordering -is- provided
@@ -619,7 +619,7 @@ for load-store control dependencies, as in the following example:
 
 	q = READ_ONCE_CTRL(a);
 	if (q) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 	}
 
 Control dependencies pair normally with other types of barriers.  That
@@ -647,11 +647,11 @@ branches of the "if" statement as follows:
 	q = READ_ONCE_CTRL(a);
 	if (q) {
 		barrier();
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
 		barrier();
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something_else();
 	}
 
@@ -660,12 +660,12 @@ optimization levels:
 
 	q = READ_ONCE_CTRL(a);
 	barrier();
-	ACCESS_ONCE(b) = p;  /* BUG: No ordering vs. load from a!!! */
+	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
-		/* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
 		do_something();
 	} else {
-		/* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
 		do_something_else();
 	}
 
@@ -676,7 +676,7 @@ assembly code even after all compiler optimizations have been applied.
 Therefore, if you need ordering in this example, you need explicit
 memory barriers, for example, smp_store_release():
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		smp_store_release(&b, p);
 		do_something();
@@ -690,10 +690,10 @@ ordering is guaranteed only when the stores differ, for example:
 
 	q = READ_ONCE_CTRL(a);
 	if (q) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -706,10 +706,10 @@ the needed conditional.  For example:
 
 	q = READ_ONCE_CTRL(a);
 	if (q % MAX) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -718,7 +718,7 @@ equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
 	q = READ_ONCE_CTRL(a);
-	ACCESS_ONCE(b) = p;
+	WRITE_ONCE(b, p);
 	do_something_else();
 
 Given this transformation, the CPU is not required to respect the ordering
@@ -731,10 +731,10 @@ one, perhaps as follows:
 	q = READ_ONCE_CTRL(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -747,17 +747,17 @@ evaluation.  Consider this example:
 
 	q = READ_ONCE_CTRL(a);
 	if (q || 1 > 0)
-		ACCESS_ONCE(b) = 1;
+		WRITE_ONCE(b, 1);
 
 Because the first condition cannot fault and the second condition is
 always true, the compiler can transform this example as following,
 defeating control dependency:
 
 	q = READ_ONCE_CTRL(a);
-	ACCESS_ONCE(b) = 1;
+	WRITE_ONCE(b, 1);
 
 This example underscores the need to ensure that the compiler cannot
-out-guess your code.  More generally, although ACCESS_ONCE() does force
+out-guess your code.  More generally, although READ_ONCE() does force
 the compiler to actually emit code for a given load, it does not force
 the compiler to use the results.
 
@@ -769,7 +769,7 @@ x and y both being zero:
 	=======================   =======================
 	r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
 	if (r1 > 0)               if (r2 > 0)
-	  ACCESS_ONCE(y) = 1;       ACCESS_ONCE(x) = 1;
+	  WRITE_ONCE(y, 1);         WRITE_ONCE(x, 1);
 
 	assert(!(r1 == 1 && r2 == 1));
 
@@ -779,7 +779,7 @@ then adding the following CPU would guarantee a related assertion:
 
 	CPU 2
 	=====================
-	ACCESS_ONCE(x) = 2;
+	WRITE_ONCE(x, 2);
 
 	assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
 
@@ -798,8 +798,7 @@ In summary:
 
   (*) Control dependencies must be headed by READ_ONCE_CTRL().
       Or, as a much less preferable alternative, interpose
-      be headed by READ_ONCE() or an ACCESS_ONCE() read and must
-      have smp_read_barrier_depends() between this read and the
+      smp_read_barrier_depends() between a READ_ONCE() and the
       control-dependent write.
 
   (*) Control dependencies can order prior loads against later stores.
@@ -815,15 +814,16 @@ In summary:
 
   (*) Control dependencies require at least one run-time conditional
       between the prior load and the subsequent store, and this
-      conditional must involve the prior load.  If the compiler
-      is able to optimize the conditional away, it will have also
-      optimized away the ordering.  Careful use of ACCESS_ONCE() can
-      help to preserve the needed conditional.
+      conditional must involve the prior load.  If the compiler is able
+      to optimize the conditional away, it will have also optimized
+      away the ordering.  Careful use of READ_ONCE_CTRL() READ_ONCE(),
+      and WRITE_ONCE() can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of ACCESS_ONCE() or
-      barrier() can help to preserve your control dependency.  Please
-      see the Compiler Barrier section for more information.
+      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
+      or smp_read_barrier_depends() can help to preserve your control
+      dependency.  Please see the Compiler Barrier section for more
+      information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
@@ -848,11 +848,11 @@ barrier, an acquire barrier, a release barrier, or a general barrier:
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	ACCESS_ONCE(a) = 1;
+	WRITE_ONCE(a, 1);
 	<write barrier>
-	ACCESS_ONCE(b) = 2;   x = ACCESS_ONCE(b);
+	WRITE_ONCE(b, 2);     x = READ_ONCE(b);
 			      <read barrier>
-			      y = ACCESS_ONCE(a);
+			      y = READ_ONCE(a);
 
 Or:
 
@@ -860,7 +860,7 @@ Or:
 	===============	      ===============================
 	a = 1;
 	<write barrier>
-	ACCESS_ONCE(b) = &a;  x = ACCESS_ONCE(b);
+	WRITE_ONCE(b, &a);    x = READ_ONCE(b);
 			      <data dependency barrier>
 			      y = *x;
 
@@ -868,11 +868,11 @@ Or even:
 
 	CPU 1		      CPU 2
 	===============	      ===============================
-	r1 = ACCESS_ONCE(y);
+	r1 = READ_ONCE(y);
 	<general barrier>
-	ACCESS_ONCE(y) = 1;   if (r2 = ACCESS_ONCE(x)) {
+	WRITE_ONCE(y, 1);     if (r2 = READ_ONCE(x)) {
 			         <implicit control dependency>
-			         ACCESS_ONCE(y) = 1;
+			         WRITE_ONCE(y, 1);
 			      }
 
 	assert(r1 == 0 || r2 == 0);
@@ -886,11 +886,11 @@ versa:
 
 	CPU 1                               CPU 2
 	===================                 ===================
-	ACCESS_ONCE(a) = 1;  }----   --->{  v = ACCESS_ONCE(c);
-	ACCESS_ONCE(b) = 2;  }    \ /    {  w = ACCESS_ONCE(d);
+	WRITE_ONCE(a, 1);    }----   --->{  v = READ_ONCE(c);
+	WRITE_ONCE(b, 2);    }    \ /    {  w = READ_ONCE(d);
 	<write barrier>            \        <read barrier>
-	ACCESS_ONCE(c) = 3;  }    / \    {  x = ACCESS_ONCE(a);
-	ACCESS_ONCE(d) = 4;  }----   --->{  y = ACCESS_ONCE(b);
+	WRITE_ONCE(c, 3);    }    / \    {  x = READ_ONCE(a);
+	WRITE_ONCE(d, 4);    }----   --->{  y = READ_ONCE(b);
 
 
 EXAMPLES OF MEMORY BARRIER SEQUENCES
@@ -1340,10 +1340,10 @@ compiler from moving the memory accesses either side of it to the other side:
 
 	barrier();
 
-This is a general barrier -- there are no read-read or write-write variants
-of barrier().  However, ACCESS_ONCE() can be thought of as a weak form
-for barrier() that affects only the specific accesses flagged by the
-ACCESS_ONCE().
+This is a general barrier -- there are no read-read or write-write
+variants of barrier().  However, READ_ONCE() and WRITE_ONCE() can be
+thought of as weak forms of barrier() that affect only the specific
+accesses flagged by the READ_ONCE() or WRITE_ONCE().
 
 The barrier() function has the following effects:
 
@@ -1355,9 +1355,10 @@ The barrier() function has the following effects:
  (*) Within a loop, forces the compiler to load the variables used
      in that loop's conditional on each pass through that loop.
 
-The ACCESS_ONCE() function can prevent any number of optimizations that,
-while perfectly safe in single-threaded code, can be fatal in concurrent
-code.  Here are some examples of these sorts of optimizations:
+The READ_ONCE() and WRITE_ONCE() functions can prevent any number of
+optimizations that, while perfectly safe in single-threaded code, can
+be fatal in concurrent code.  Here are some examples of these sorts
+of optimizations:
 
  (*) The compiler is within its rights to reorder loads and stores
      to the same variable, and in some cases, the CPU is within its
@@ -1370,11 +1371,11 @@ code.  Here are some examples of these sorts of optimizations:
      Might result in an older value of x stored in a[1] than in a[0].
      Prevent both the compiler and the CPU from doing this as follows:
 
-	a[0] = ACCESS_ONCE(x);
-	a[1] = ACCESS_ONCE(x);
+	a[0] = READ_ONCE(x);
+	a[1] = READ_ONCE(x);
 
-     In short, ACCESS_ONCE() provides cache coherence for accesses from
-     multiple CPUs to a single variable.
+     In short, READ_ONCE() and WRITE_ONCE() provide cache coherence for
+     accesses from multiple CPUs to a single variable.
 
  (*) The compiler is within its rights to merge successive loads from
      the same variable.  Such merging can cause the compiler to "optimize"
@@ -1391,9 +1392,9 @@ code.  Here are some examples of these sorts of optimizations:
 		for (;;)
 			do_something_with(tmp);
 
-     Use ACCESS_ONCE() to prevent the compiler from doing this to you:
+     Use READ_ONCE() to prevent the compiler from doing this to you:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
  (*) The compiler is within its rights to reload a variable, for example,
@@ -1415,9 +1416,9 @@ code.  Here are some examples of these sorts of optimizations:
      a was modified by some other CPU between the "while" statement and
      the call to do_something_with().
 
-     Again, use ACCESS_ONCE() to prevent the compiler from doing this:
+     Again, use READ_ONCE() to prevent the compiler from doing this:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
      Note that if the compiler runs short of registers, it might save
@@ -1437,21 +1438,21 @@ code.  Here are some examples of these sorts of optimizations:
 
 	do { } while (0);
 
-     This transformation is a win for single-threaded code because it gets
-     rid of a load and a branch.  The problem is that the compiler will
-     carry out its proof assuming that the current CPU is the only one
-     updating variable 'a'.  If variable 'a' is shared, then the compiler's
-     proof will be erroneous.  Use ACCESS_ONCE() to tell the compiler
-     that it doesn't know as much as it thinks it does:
+     This transformation is a win for single-threaded code because it
+     gets rid of a load and a branch.  The problem is that the compiler
+     will carry out its proof assuming that the current CPU is the only
+     one updating variable 'a'.  If variable 'a' is shared, then the
+     compiler's proof will be erroneous.  Use READ_ONCE() to tell the
+     compiler that it doesn't know as much as it thinks it does:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
      But please note that the compiler is also closely watching what you
-     do with the value after the ACCESS_ONCE().  For example, suppose you
+     do with the value after the READ_ONCE().  For example, suppose you
      do the following and MAX is a preprocessor macro with the value 1:
 
-	while ((tmp = ACCESS_ONCE(a)) % MAX)
+	while ((tmp = READ_ONCE(a)) % MAX)
 		do_something_with(tmp);
 
      Then the compiler knows that the result of the "%" operator applied
@@ -1475,12 +1476,12 @@ code.  Here are some examples of these sorts of optimizations:
      surprise if some other CPU might have stored to variable 'a' in the
      meantime.
 
-     Use ACCESS_ONCE() to prevent the compiler from making this sort of
+     Use WRITE_ONCE() to prevent the compiler from making this sort of
      wrong guess:
 
-	ACCESS_ONCE(a) = 0;
+	WRITE_ONCE(a, 0);
 	/* Code that does not store to variable a. */
-	ACCESS_ONCE(a) = 0;
+	WRITE_ONCE(a, 0);
 
  (*) The compiler is within its rights to reorder memory accesses unless
      you tell it not to.  For example, consider the following interaction
@@ -1509,40 +1510,43 @@ code.  Here are some examples of these sorts of optimizations:
 	}
 
      If the interrupt occurs between these two statement, then
-     interrupt_handler() might be passed a garbled msg.  Use ACCESS_ONCE()
+     interrupt_handler() might be passed a garbled msg.  Use WRITE_ONCE()
      to prevent this as follows:
 
 	void process_level(void)
 	{
-		ACCESS_ONCE(msg) = get_message();
-		ACCESS_ONCE(flag) = true;
+		WRITE_ONCE(msg, get_message());
+		WRITE_ONCE(flag, true);
 	}
 
 	void interrupt_handler(void)
 	{
-		if (ACCESS_ONCE(flag))
-			process_message(ACCESS_ONCE(msg));
+		if (READ_ONCE(flag))
+			process_message(READ_ONCE(msg));
 	}
 
-     Note that the ACCESS_ONCE() wrappers in interrupt_handler()
-     are needed if this interrupt handler can itself be interrupted
-     by something that also accesses 'flag' and 'msg', for example,
-     a nested interrupt or an NMI.  Otherwise, ACCESS_ONCE() is not
-     needed in interrupt_handler() other than for documentation purposes.
-     (Note also that nested interrupts do not typically occur in modern
-     Linux kernels, in fact, if an interrupt handler returns with
-     interrupts enabled, you will get a WARN_ONCE() splat.)
-
-     You should assume that the compiler can move ACCESS_ONCE() past
-     code not containing ACCESS_ONCE(), barrier(), or similar primitives.
-
-     This effect could also be achieved using barrier(), but ACCESS_ONCE()
-     is more selective:  With ACCESS_ONCE(), the compiler need only forget
-     the contents of the indicated memory locations, while with barrier()
-     the compiler must discard the value of all memory locations that
-     it has currented cached in any machine registers.  Of course,
-     the compiler must also respect the order in which the ACCESS_ONCE()s
-     occur, though the CPU of course need not do so.
+     Note that the READ_ONCE() and WRITE_ONCE() wrappers in
+     interrupt_handler() are needed if this interrupt handler can itself
+     be interrupted by something that also accesses 'flag' and 'msg',
+     for example, a nested interrupt or an NMI.  Otherwise, READ_ONCE()
+     and WRITE_ONCE() are not needed in interrupt_handler() other than
+     for documentation purposes.  (Note also that nested interrupts
+     do not typically occur in modern Linux kernels, in fact, if an
+     interrupt handler returns with interrupts enabled, you will get a
+     WARN_ONCE() splat.)
+
+     You should assume that the compiler can move READ_ONCE() and
+     WRITE_ONCE() past code not containing READ_ONCE(), WRITE_ONCE(),
+     barrier(), or similar primitives.
+
+     This effect could also be achieved using barrier(), but READ_ONCE()
+     and WRITE_ONCE() are more selective:  With READ_ONCE() and
+     WRITE_ONCE(), the compiler need only forget the contents of the
+     indicated memory locations, while with barrier() the compiler must
+     discard the value of all memory locations that it has currented
+     cached in any machine registers.  Of course, the compiler must also
+     respect the order in which the READ_ONCE()s and WRITE_ONCE()s occur,
+     though the CPU of course need not do so.
 
  (*) The compiler is within its rights to invent stores to a variable,
      as in the following example:
@@ -1562,16 +1566,16 @@ code.  Here are some examples of these sorts of optimizations:
      a branch.  Unfortunately, in concurrent code, this optimization
      could cause some other CPU to see a spurious value of 42 -- even
      if variable 'a' was never zero -- when loading variable 'b'.
-     Use ACCESS_ONCE() to prevent this as follows:
+     Use WRITE_ONCE() to prevent this as follows:
 
 	if (a)
-		ACCESS_ONCE(b) = a;
+		WRITE_ONCE(b, a);
 	else
-		ACCESS_ONCE(b) = 42;
+		WRITE_ONCE(b, 42);
 
      The compiler can also invent loads.  These are usually less
      damaging, but they can result in cache-line bouncing and thus in
-     poor performance and scalability.  Use ACCESS_ONCE() to prevent
+     poor performance and scalability.  Use READ_ONCE() to prevent
      invented loads.
 
  (*) For aligned memory locations whose size allows them to be accessed
@@ -1590,9 +1594,9 @@ code.  Here are some examples of these sorts of optimizations:
      This optimization can therefore be a win in single-threaded code.
      In fact, a recent bug (since fixed) caused GCC to incorrectly use
      this optimization in a volatile store.  In the absence of such bugs,
-     use of ACCESS_ONCE() prevents store tearing in the following example:
+     use of WRITE_ONCE() prevents store tearing in the following example:
 
-	ACCESS_ONCE(p) = 0x00010002;
+	WRITE_ONCE(p, 0x00010002);
 
      Use of packed structures can also result in load and store tearing,
      as in this example:
@@ -1609,22 +1613,23 @@ code.  Here are some examples of these sorts of optimizations:
 	foo2.b = foo1.b;
 	foo2.c = foo1.c;
 
-     Because there are no ACCESS_ONCE() wrappers and no volatile markings,
-     the compiler would be well within its rights to implement these three
-     assignment statements as a pair of 32-bit loads followed by a pair
-     of 32-bit stores.  This would result in load tearing on 'foo1.b'
-     and store tearing on 'foo2.b'.  ACCESS_ONCE() again prevents tearing
-     in this example:
+     Because there are no READ_ONCE() or WRITE_ONCE() wrappers and no
+     volatile markings, the compiler would be well within its rights to
+     implement these three assignment statements as a pair of 32-bit
+     loads followed by a pair of 32-bit stores.  This would result in
+     load tearing on 'foo1.b' and store tearing on 'foo2.b'.  READ_ONCE()
+     and WRITE_ONCE() again prevent tearing in this example:
 
 	foo2.a = foo1.a;
-	ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b);
+	WRITE_ONCE(foo2.b, READ_ONCE(foo1.b));
 	foo2.c = foo1.c;
 
-All that aside, it is never necessary to use ACCESS_ONCE() on a variable
-that has been marked volatile.  For example, because 'jiffies' is marked
-volatile, it is never necessary to say ACCESS_ONCE(jiffies).  The reason
-for this is that ACCESS_ONCE() is implemented as a volatile cast, which
-has no effect when its argument is already marked volatile.
+All that aside, it is never necessary to use READ_ONCE() and
+WRITE_ONCE() on a variable that has been marked volatile.  For example,
+because 'jiffies' is marked volatile, it is never necessary to
+say READ_ONCE(jiffies).  The reason for this is that READ_ONCE() and
+WRITE_ONCE() are implemented as volatile casts, which has no effect when
+its argument is already marked volatile.
 
 Please note that these compiler barriers have no direct effect on the CPU,
 which may then reorder things however it wishes.
@@ -1646,14 +1651,15 @@ The Linux kernel has eight basic CPU memory barriers:
 All memory barriers except the data dependency barriers imply a compiler
 barrier. Data dependencies do not impose any additional compiler ordering.
 
-Aside: In the case of data dependencies, the compiler would be expected to
-issue the loads in the correct order (eg. `a[b]` would have to load the value
-of b before loading a[b]), however there is no guarantee in the C specification
-that the compiler may not speculate the value of b (eg. is equal to 1) and load
-a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the
-problem of a compiler reloading b after having loaded a[b], thus having a newer
-copy of b than a[b]. A consensus has not yet been reached about these problems,
-however the ACCESS_ONCE macro is a good place to start looking.
+Aside: In the case of data dependencies, the compiler would be expected
+to issue the loads in the correct order (eg. `a[b]` would have to load
+the value of b before loading a[b]), however there is no guarantee in
+the C specification that the compiler may not speculate the value of b
+(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
+tmp = a[b]; ). There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+has not yet been reached about these problems, however the READ_ONCE()
+macro is a good place to start looking.
 
 SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
 systems because it is assumed that a CPU will appear to be self-consistent,
@@ -2126,12 +2132,12 @@ three CPUs; then should the following sequence of events occur:
 
 	CPU 1				CPU 2
 	===============================	===============================
-	ACCESS_ONCE(*A) = a;		ACCESS_ONCE(*E) = e;
+	WRITE_ONCE(*A, a);		WRITE_ONCE(*E, e);
 	ACQUIRE M			ACQUIRE Q
-	ACCESS_ONCE(*B) = b;		ACCESS_ONCE(*F) = f;
-	ACCESS_ONCE(*C) = c;		ACCESS_ONCE(*G) = g;
+	WRITE_ONCE(*B, b);		WRITE_ONCE(*F, f);
+	WRITE_ONCE(*C, c);		WRITE_ONCE(*G, g);
 	RELEASE M			RELEASE Q
-	ACCESS_ONCE(*D) = d;		ACCESS_ONCE(*H) = h;
+	WRITE_ONCE(*D, d);		WRITE_ONCE(*H, h);
 
 Then there is no guarantee as to what order CPU 3 will see the accesses to *A
 through *H occur in, other than the constraints imposed by the separate locks
@@ -2151,18 +2157,18 @@ However, if the following occurs:
 
 	CPU 1				CPU 2
 	===============================	===============================
-	ACCESS_ONCE(*A) = a;
+	WRITE_ONCE(*A, a);
 	ACQUIRE M		     [1]
-	ACCESS_ONCE(*B) = b;
-	ACCESS_ONCE(*C) = c;
+	WRITE_ONCE(*B, b);
+	WRITE_ONCE(*C, c);
 	RELEASE M	     [1]
-	ACCESS_ONCE(*D) = d;		ACCESS_ONCE(*E) = e;
+	WRITE_ONCE(*D, d);		WRITE_ONCE(*E, e);
 					ACQUIRE M		     [2]
 					smp_mb__after_unlock_lock();
-					ACCESS_ONCE(*F) = f;
-					ACCESS_ONCE(*G) = g;
+					WRITE_ONCE(*F, f);
+					WRITE_ONCE(*G, g);
 					RELEASE M	     [2]
-					ACCESS_ONCE(*H) = h;
+					WRITE_ONCE(*H, h);
 
 CPU 3 might see:
 
@@ -2881,11 +2887,11 @@ A programmer might take it for granted that the CPU will perform memory
 operations in exactly the order specified, so that if the CPU is, for example,
 given the following piece of code to execute:
 
-	a = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*B) = b;
-	c = ACCESS_ONCE(*C);
-	d = ACCESS_ONCE(*D);
-	ACCESS_ONCE(*E) = e;
+	a = READ_ONCE(*A);
+	WRITE_ONCE(*B, b);
+	c = READ_ONCE(*C);
+	d = READ_ONCE(*D);
+	WRITE_ONCE(*E, e);
 
 they would then expect that the CPU will complete the memory operation for each
 instruction before moving on to the next one, leading to a definite sequence of
@@ -2932,12 +2938,12 @@ However, it is guaranteed that a CPU will be self-consistent: it will see its
 _own_ accesses appear to be correctly ordered, without the need for a memory
 barrier.  For instance with the following code:
 
-	U = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*A) = V;
-	ACCESS_ONCE(*A) = W;
-	X = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*A) = Y;
-	Z = ACCESS_ONCE(*A);
+	U = READ_ONCE(*A);
+	WRITE_ONCE(*A, V);
+	WRITE_ONCE(*A, W);
+	X = READ_ONCE(*A);
+	WRITE_ONCE(*A, Y);
+	Z = READ_ONCE(*A);
 
 and assuming no intervention by an external influence, it can be assumed that
 the final result will appear to be:
@@ -2953,13 +2959,14 @@ accesses:
 	U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
 
 in that order, but, without intervention, the sequence may have almost any
-combination of elements combined or discarded, provided the program's view of
-the world remains consistent.  Note that ACCESS_ONCE() is -not- optional
-in the above example, as there are architectures where a given CPU might
-reorder successive loads to the same location.  On such architectures,
-ACCESS_ONCE() does whatever is necessary to prevent this, for example, on
-Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the
-special ld.acq and st.rel instructions that prevent such reordering.
+combination of elements combined or discarded, provided the program's view
+of the world remains consistent.  Note that READ_ONCE() and WRITE_ONCE()
+are -not- optional in the above example, as there are architectures
+where a given CPU might reorder successive loads to the same location.
+On such architectures, READ_ONCE() and WRITE_ONCE() do whatever is
+necessary to prevent this, for example, on Itanium the volatile casts
+used by READ_ONCE() and WRITE_ONCE() cause GCC to emit the special ld.acq
+and st.rel instructions (respectively) that prevent such reordering.
 
 The compiler may also combine, discard or defer elements of the sequence before
 the CPU even sees them.
@@ -2973,13 +2980,14 @@ may be reduced to:
 
 	*A = W;
 
-since, without either a write barrier or an ACCESS_ONCE(), it can be
+since, without either a write barrier or an WRITE_ONCE(), it can be
 assumed that the effect of the storage of V to *A is lost.  Similarly:
 
 	*A = Y;
 	Z = *A;
 
-may, without a memory barrier or an ACCESS_ONCE(), be reduced to:
+may, without a memory barrier or an READ_ONCE() and WRITE_ONCE(), be
+reduced to:
 
 	*A = Y;
 	Z = Y;
-- 
cgit v0.10.2


From 96d7744e0a5631a1b5fef2a97658150b165f02b6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Jul 2015 15:55:52 -0700
Subject: doc: Call out smp_mb__after_unlock_lock() transitivity

Although "full barrier" should be interpreted as providing transitivity,
it is worth eliminating any possible confusion.  This commit therefore
adds "(including transitivity)" to eliminate any possible confusion.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 470c07c..3185238 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1858,11 +1858,12 @@ Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not
 imply a full memory barrier.  If it is necessary for a RELEASE-ACQUIRE
 pair to produce a full barrier, the ACQUIRE can be followed by an
 smp_mb__after_unlock_lock() invocation.  This will produce a full barrier
-if either (a) the RELEASE and the ACQUIRE are executed by the same
-CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable.
-The smp_mb__after_unlock_lock() primitive is free on many architectures.
-Without smp_mb__after_unlock_lock(), the CPU's execution of the critical
-sections corresponding to the RELEASE and the ACQUIRE can cross, so that:
+(including transitivity) if either (a) the RELEASE and the ACQUIRE are
+executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on
+the same variable.  The smp_mb__after_unlock_lock() primitive is free
+on many architectures.  Without smp_mb__after_unlock_lock(), the CPU's
+execution of the critical sections corresponding to the RELEASE and the
+ACQUIRE can cross, so that:
 
 	*A = a;
 	RELEASE M
-- 
cgit v0.10.2


From d9eba768839ac24e47606af36e50c14f10c2211c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 May 2015 15:35:43 -0700
Subject: rcutorture: Better bounds checking for n_barrier_cbs

A negative value for rcutorture.n_barrier_cbs can pass a negative value
to the memory allocator, so this commit instead causes rcu_barrier()
testing to be disabled in this case.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 59e3268..7e29a32 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1507,7 +1507,7 @@ static int rcu_torture_barrier_init(void)
 	int i;
 	int ret;
 
-	if (n_barrier_cbs == 0)
+	if (n_barrier_cbs <= 0)
 		return 0;
 	if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
 		pr_alert("%s" TORTURE_FLAG
-- 
cgit v0.10.2


From 4444d852a99b8f0310f369da8473ec3639e380a7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 May 2015 15:42:40 -0700
Subject: rcutorture: Check nfakewriters parameter

Currently, a negative value for rcutorture.nfakewriters= can cause
rcutorture to pass a negative size to the memory allocator, which
is not really a particularly good thing to do.  This commit therefore
adds bounds checking to this parameter, so that values that are less
than or equal to zero disable fake writing.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7e29a32..2cbe569 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1786,12 +1786,15 @@ rcu_torture_init(void)
 					  writer_task);
 	if (firsterr)
 		goto unwind;
-	fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
-				   GFP_KERNEL);
-	if (fakewriter_tasks == NULL) {
-		VERBOSE_TOROUT_ERRSTRING("out of memory");
-		firsterr = -ENOMEM;
-		goto unwind;
+	if (nfakewriters > 0) {
+		fakewriter_tasks = kzalloc(nfakewriters *
+					   sizeof(fakewriter_tasks[0]),
+					   GFP_KERNEL);
+		if (fakewriter_tasks == NULL) {
+			VERBOSE_TOROUT_ERRSTRING("out of memory");
+			firsterr = -ENOMEM;
+			goto unwind;
+		}
 	}
 	for (i = 0; i < nfakewriters; i++) {
 		firsterr = torture_create_kthread(rcu_torture_fakewriter,
-- 
cgit v0.10.2


From e8e255f7191fb6491dd1d96cfbbe19981f6eb3dd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 May 2015 16:55:45 -0700
Subject: rcutorture: Bounds-check rcutorture.shuffle_interval

Specifying a negative rcutorture.shuffle_interval value will cause a
negative value to be used as a sleep time.  This commit therefore
refuses to start shuffling unless the rcutorture.shuffle_interval
value is greater than zero.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 2cbe569..1cead78 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1821,7 +1821,7 @@ rcu_torture_init(void)
 		if (firsterr)
 			goto unwind;
 	}
-	if (test_no_idle_hz) {
+	if (test_no_idle_hz && shuffle_interval > 0) {
 		firsterr = torture_shuffle_init(shuffle_interval * HZ);
 		if (firsterr)
 			goto unwind;
-- 
cgit v0.10.2


From 3a0af333415830d2a0ca77de832336af5aadced4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Jun 2015 18:11:31 -0700
Subject: rcutorture: Fix rcu_torture_cbflood() for callback-free RCU

The rcu_torture_cbflood() function correctly checks for flavors of
RCU that lack analogs to call_rcu() and rcu_barrier(), but in that
case it fails to terminate correctly.  In fact, it terminates so
incorrectly that segfaults can result.  This commit therefore causes
rcu_torture_cbflood() to do the proper wait-for-stop procedure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 1cead78..e0eda3c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -823,9 +823,7 @@ rcu_torture_cbflood(void *arg)
 	}
 	if (err) {
 		VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM");
-		while (!torture_must_stop())
-			schedule_timeout_interruptible(HZ);
-		return 0;
+		goto wait_for_stop;
 	}
 	VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started");
 	do {
@@ -844,6 +842,7 @@ rcu_torture_cbflood(void *arg)
 		stutter_wait("rcu_torture_cbflood");
 	} while (!torture_must_stop());
 	vfree(rhp);
+wait_for_stop:
 	torture_kthread_stopping("rcu_torture_cbflood");
 	return 0;
 }
-- 
cgit v0.10.2


From 5be5d1a11775fadc6104789fad72fae46dff348e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 30 Jun 2015 08:57:57 -0700
Subject: rcutorture: Add RCU-tasks qualifier to dereference

Although RCU-tasks isn't really designed to support rcu_dereference()
and list manipulation, that is how rcutorture tests it.  Which means
that lockdep-RCU complains about the rcu_dereference_check() invocations
because RCU-tasks doesn't have read-side markers.  This commit therefore
creates a torturing_tasks() to silence the lockdep-RCU complaints from
rcu_dereference_check() when RCU-tasks is being tortured.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index e0eda3c..67b3f26 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -684,10 +684,20 @@ static struct rcu_torture_ops tasks_ops = {
 
 #define RCUTORTURE_TASKS_OPS &tasks_ops,
 
+static bool __maybe_unused torturing_tasks(void)
+{
+	return cur_ops == &tasks_ops;
+}
+
 #else /* #ifdef CONFIG_TASKS_RCU */
 
 #define RCUTORTURE_TASKS_OPS
 
+static bool torturing_tasks(void)
+{
+	return false;
+}
+
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /*
@@ -1087,7 +1097,8 @@ static void rcu_torture_timer(unsigned long unused)
 	p = rcu_dereference_check(rcu_torture_current,
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
-				  srcu_read_lock_held(srcu_ctlp));
+				  srcu_read_lock_held(srcu_ctlp) ||
+				  torturing_tasks());
 	if (p == NULL) {
 		/* Leave because rcu_torture_writer is not yet underway */
 		cur_ops->readunlock(idx);
@@ -1161,7 +1172,8 @@ rcu_torture_reader(void *arg)
 		p = rcu_dereference_check(rcu_torture_current,
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
-					  srcu_read_lock_held(srcu_ctlp));
+					  srcu_read_lock_held(srcu_ctlp) ||
+					  torturing_tasks());
 		if (p == NULL) {
 			/* Wait for rcu_torture_writer to get underway */
 			cur_ops->readunlock(idx);
-- 
cgit v0.10.2


From d6a8c6d34222f7d9de4c819fbcfbaebb1e40e8a2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 30 Jun 2015 09:14:01 -0700
Subject: rcutorture: Enable lockdep-RCU on TASKS01

Currently none of the RCU-tasks scenarios enables lockdep-RCU, which
causes bugs to be missed.  This commit therefore enables lockdep-RCU
on TASKS01.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 2cc0e60..bafe94c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -5,6 +5,6 @@ CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_EXPERT=y
-- 
cgit v0.10.2