From 21b05de4c8fac08fff08cf84ef1d4fe5786f9608 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 May 2015 17:29:51 -0700 Subject: documentation: Bring rcutorture parameters up to date This commit changes the documentation of the rcutorture parameters to better match reality. Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1d6f0459cd7b..01b5b68a237a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3135,22 +3135,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted. in a given burst of a callback-flood test. rcutorture.fqs_duration= [KNL] - Set duration of force_quiescent_state bursts. + Set duration of force_quiescent_state bursts + in microseconds. rcutorture.fqs_holdoff= [KNL] - Set holdoff time within force_quiescent_state bursts. + Set holdoff time within force_quiescent_state bursts + in microseconds. rcutorture.fqs_stutter= [KNL] - Set wait time between force_quiescent_state bursts. + Set wait time between force_quiescent_state bursts + in seconds. + + rcutorture.gp_cond= [KNL] + Use conditional/asynchronous update-side + primitives, if available. rcutorture.gp_exp= [KNL] - Use expedited update-side primitives. + Use expedited update-side primitives, if available. rcutorture.gp_normal= [KNL] - Use normal (non-expedited) update-side primitives. - If both gp_exp and gp_normal are set, do both. - If neither gp_exp nor gp_normal are set, still - do both. + Use normal (non-expedited) asynchronous + update-side primitives, if available. + + rcutorture.gp_sync= [KNL] + Use normal (non-expedited) synchronous + update-side primitives, if available. If all + of rcutorture.gp_cond=, rcutorture.gp_exp=, + rcutorture.gp_normal=, and rcutorture.gp_sync= + are zero, rcutorture acts as if is interpreted + they are all non-zero. rcutorture.n_barrier_cbs= [KNL] Set callbacks/threads for rcu_barrier() testing. @@ -3177,9 +3190,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set time (s) between CPU-hotplug operations, or zero to disable CPU-hotplug testing. - rcutorture.torture_runnable= [BOOT] - Start rcutorture running at boot time. - rcutorture.shuffle_interval= [KNL] Set task-shuffle interval (s). Shuffling tasks allows some CPUs to go into dyntick-idle mode @@ -3220,6 +3230,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Test RCU's dyntick-idle handling. See also the rcutorture.shuffle_interval parameter. + rcutorture.torture_runnable= [BOOT] + Start rcutorture running at boot time. + rcutorture.torture_type= [KNL] Specify the RCU implementation to test. -- cgit v1.2.3 From 297f739938908a4262603314576e32ee7375296c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 May 2015 17:31:07 -0700 Subject: documentation: Fix spelling of "operators" Reported-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney --- Documentation/RCU/rcu_dereference.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt index 1e6c0da994f5..c0bf2441a2ba 100644 --- a/Documentation/RCU/rcu_dereference.txt +++ b/Documentation/RCU/rcu_dereference.txt @@ -28,7 +28,7 @@ o You must use one of the rcu_dereference() family of primitives o Avoid cancellation when using the "+" and "-" infix arithmetic operators. For example, for a given variable "x", avoid "(x-x)". There are similar arithmetic pitfalls from other - arithmetic operatiors, such as "(x*0)", "(x/(x+1))" or "(x%1)". + arithmetic operators, such as "(x*0)", "(x/(x+1))" or "(x%1)". The compiler is within its rights to substitute zero for all of these expressions, so that subsequent accesses no longer depend on the rcu_dereference(), again possibly resulting in bugs due -- cgit v1.2.3 From 57aecae950c55ef50934640794160cd118e73256 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 May 2015 18:27:42 -0700 Subject: documentation: Fix variable-name typo in memory-barriers.txt Signed-off-by: Paul E. McKenney --- Documentation/memory-barriers.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 13feb697271f..3d06f98b2ff2 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -746,7 +746,7 @@ You must also be careful not to rely too much on boolean short-circuit evaluation. Consider this example: q = READ_ONCE_CTRL(a); - if (a || 1 > 0) + if (q || 1 > 0) ACCESS_ONCE(b) = 1; Because the first condition cannot fault and the second condition is -- cgit v1.2.3 From 9af194cefc3c40e75a59df4cbb06e1c1064bee7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Jun 2015 14:33:24 -0700 Subject: documentation: Replace ACCESS_ONCE() by READ_ONCE() and WRITE_ONCE() Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- Documentation/memory-barriers.txt | 346 +++++++++++++++++++------------------- 1 file changed, 177 insertions(+), 169 deletions(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 3d06f98b2ff2..470c07c868e4 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -194,22 +194,22 @@ There are some minimal guarantees that may be expected of a CPU: (*) On any given CPU, dependent memory accesses will be issued in order, with respect to itself. This means that for: - ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q); + WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q); the CPU will issue the following memory operations: Q = LOAD P, D = LOAD *Q and always in that order. On most systems, smp_read_barrier_depends() - does nothing, but it is required for DEC Alpha. The ACCESS_ONCE() - is required to prevent compiler mischief. Please note that you - should normally use something like rcu_dereference() instead of - open-coding smp_read_barrier_depends(). + does nothing, but it is required for DEC Alpha. The READ_ONCE() + and WRITE_ONCE() are required to prevent compiler mischief. Please + note that you should normally use something like rcu_dereference() + instead of open-coding smp_read_barrier_depends(). (*) Overlapping loads and stores within a particular CPU will appear to be ordered within that CPU. This means that for: - a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b; + a = READ_ONCE(*X); WRITE_ONCE(*X, b); the CPU will only issue the following sequence of memory operations: @@ -217,7 +217,7 @@ There are some minimal guarantees that may be expected of a CPU: And for: - ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X); + WRITE_ONCE(*X, c); d = READ_ONCE(*X); the CPU will only issue: @@ -228,11 +228,11 @@ There are some minimal guarantees that may be expected of a CPU: And there are a number of things that _must_ or _must_not_ be assumed: - (*) It _must_not_ be assumed that the compiler will do what you want with - memory references that are not protected by ACCESS_ONCE(). Without - ACCESS_ONCE(), the compiler is within its rights to do all sorts - of "creative" transformations, which are covered in the Compiler - Barrier section. + (*) It _must_not_ be assumed that the compiler will do what you want + with memory references that are not protected by READ_ONCE() and + WRITE_ONCE(). Without them, the compiler is within its rights to + do all sorts of "creative" transformations, which are covered in + the Compiler Barrier section. (*) It _must_not_ be assumed that independent loads and stores will be issued in the order given. This means that for: @@ -520,8 +520,8 @@ following sequence of events: { A == 1, B == 2, C = 3, P == &A, Q == &C } B = 4; - ACCESS_ONCE(P) = &B - Q = ACCESS_ONCE(P); + WRITE_ONCE(P, &B) + Q = READ_ONCE(P); D = *Q; There's a clear data dependency here, and it would seem that by the end of the @@ -547,8 +547,8 @@ between the address load and the data load: { A == 1, B == 2, C = 3, P == &A, Q == &C } B = 4; - ACCESS_ONCE(P) = &B - Q = ACCESS_ONCE(P); + WRITE_ONCE(P, &B); + Q = READ_ONCE(P); D = *Q; @@ -574,8 +574,8 @@ access: { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 } M[1] = 4; - ACCESS_ONCE(P) = 1 - Q = ACCESS_ONCE(P); + WRITE_ONCE(P, 1); + Q = READ_ONCE(P); D = M[Q]; @@ -596,10 +596,10 @@ A load-load control dependency requires a full read memory barrier, not simply a data dependency barrier to make it work correctly. Consider the following bit of code: - q = ACCESS_ONCE(a); + q = READ_ONCE(a); if (q) { /* BUG: No data dependency!!! */ - p = ACCESS_ONCE(b); + p = READ_ONCE(b); } This will not have the desired effect because there is no actual data @@ -608,10 +608,10 @@ by attempting to predict the outcome in advance, so that other CPUs see the load from b as having happened before the load from a. In such a case what's actually required is: - q = ACCESS_ONCE(a); + q = READ_ONCE(a); if (q) { - p = ACCESS_ONCE(b); + p = READ_ONCE(b); } However, stores are not speculated. This means that ordering -is- provided @@ -619,7 +619,7 @@ for load-store control dependencies, as in the following example: q = READ_ONCE_CTRL(a); if (q) { - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); } Control dependencies pair normally with other types of barriers. That @@ -647,11 +647,11 @@ branches of the "if" statement as follows: q = READ_ONCE_CTRL(a); if (q) { barrier(); - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something(); } else { barrier(); - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something_else(); } @@ -660,12 +660,12 @@ optimization levels: q = READ_ONCE_CTRL(a); barrier(); - ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */ + WRITE_ONCE(b, p); /* BUG: No ordering vs. load from a!!! */ if (q) { - /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */ + /* WRITE_ONCE(b, p); -- moved up, BUG!!! */ do_something(); } else { - /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */ + /* WRITE_ONCE(b, p); -- moved up, BUG!!! */ do_something_else(); } @@ -676,7 +676,7 @@ assembly code even after all compiler optimizations have been applied. Therefore, if you need ordering in this example, you need explicit memory barriers, for example, smp_store_release(): - q = ACCESS_ONCE(a); + q = READ_ONCE(a); if (q) { smp_store_release(&b, p); do_something(); @@ -690,10 +690,10 @@ ordering is guaranteed only when the stores differ, for example: q = READ_ONCE_CTRL(a); if (q) { - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something(); } else { - ACCESS_ONCE(b) = r; + WRITE_ONCE(b, r); do_something_else(); } @@ -706,10 +706,10 @@ the needed conditional. For example: q = READ_ONCE_CTRL(a); if (q % MAX) { - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something(); } else { - ACCESS_ONCE(b) = r; + WRITE_ONCE(b, r); do_something_else(); } @@ -718,7 +718,7 @@ equal to zero, in which case the compiler is within its rights to transform the above code into the following: q = READ_ONCE_CTRL(a); - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something_else(); Given this transformation, the CPU is not required to respect the ordering @@ -731,10 +731,10 @@ one, perhaps as follows: q = READ_ONCE_CTRL(a); BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */ if (q % MAX) { - ACCESS_ONCE(b) = p; + WRITE_ONCE(b, p); do_something(); } else { - ACCESS_ONCE(b) = r; + WRITE_ONCE(b, r); do_something_else(); } @@ -747,17 +747,17 @@ evaluation. Consider this example: q = READ_ONCE_CTRL(a); if (q || 1 > 0) - ACCESS_ONCE(b) = 1; + WRITE_ONCE(b, 1); Because the first condition cannot fault and the second condition is always true, the compiler can transform this example as following, defeating control dependency: q = READ_ONCE_CTRL(a); - ACCESS_ONCE(b) = 1; + WRITE_ONCE(b, 1); This example underscores the need to ensure that the compiler cannot -out-guess your code. More generally, although ACCESS_ONCE() does force +out-guess your code. More generally, although READ_ONCE() does force the compiler to actually emit code for a given load, it does not force the compiler to use the results. @@ -769,7 +769,7 @@ x and y both being zero: ======================= ======================= r1 = READ_ONCE_CTRL(x); r2 = READ_ONCE_CTRL(y); if (r1 > 0) if (r2 > 0) - ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1; + WRITE_ONCE(y, 1); WRITE_ONCE(x, 1); assert(!(r1 == 1 && r2 == 1)); @@ -779,7 +779,7 @@ then adding the following CPU would guarantee a related assertion: CPU 2 ===================== - ACCESS_ONCE(x) = 2; + WRITE_ONCE(x, 2); assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */ @@ -798,8 +798,7 @@ In summary: (*) Control dependencies must be headed by READ_ONCE_CTRL(). Or, as a much less preferable alternative, interpose - be headed by READ_ONCE() or an ACCESS_ONCE() read and must - have smp_read_barrier_depends() between this read and the + smp_read_barrier_depends() between a READ_ONCE() and the control-dependent write. (*) Control dependencies can order prior loads against later stores. @@ -815,15 +814,16 @@ In summary: (*) Control dependencies require at least one run-time conditional between the prior load and the subsequent store, and this - conditional must involve the prior load. If the compiler - is able to optimize the conditional away, it will have also - optimized away the ordering. Careful use of ACCESS_ONCE() can - help to preserve the needed conditional. + conditional must involve the prior load. If the compiler is able + to optimize the conditional away, it will have also optimized + away the ordering. Careful use of READ_ONCE_CTRL() READ_ONCE(), + and WRITE_ONCE() can help to preserve the needed conditional. (*) Control dependencies require that the compiler avoid reordering the - dependency into nonexistence. Careful use of ACCESS_ONCE() or - barrier() can help to preserve your control dependency. Please - see the Compiler Barrier section for more information. + dependency into nonexistence. Careful use of READ_ONCE_CTRL() + or smp_read_barrier_depends() can help to preserve your control + dependency. Please see the Compiler Barrier section for more + information. (*) Control dependencies pair normally with other types of barriers. @@ -848,11 +848,11 @@ barrier, an acquire barrier, a release barrier, or a general barrier: CPU 1 CPU 2 =============== =============== - ACCESS_ONCE(a) = 1; + WRITE_ONCE(a, 1); - ACCESS_ONCE(b) = 2; x = ACCESS_ONCE(b); + WRITE_ONCE(b, 2); x = READ_ONCE(b); - y = ACCESS_ONCE(a); + y = READ_ONCE(a); Or: @@ -860,7 +860,7 @@ Or: =============== =============================== a = 1; - ACCESS_ONCE(b) = &a; x = ACCESS_ONCE(b); + WRITE_ONCE(b, &a); x = READ_ONCE(b); y = *x; @@ -868,11 +868,11 @@ Or even: CPU 1 CPU 2 =============== =============================== - r1 = ACCESS_ONCE(y); + r1 = READ_ONCE(y); - ACCESS_ONCE(y) = 1; if (r2 = ACCESS_ONCE(x)) { + WRITE_ONCE(y, 1); if (r2 = READ_ONCE(x)) { - ACCESS_ONCE(y) = 1; + WRITE_ONCE(y, 1); } assert(r1 == 0 || r2 == 0); @@ -886,11 +886,11 @@ versa: CPU 1 CPU 2 =================== =================== - ACCESS_ONCE(a) = 1; }---- --->{ v = ACCESS_ONCE(c); - ACCESS_ONCE(b) = 2; } \ / { w = ACCESS_ONCE(d); + WRITE_ONCE(a, 1); }---- --->{ v = READ_ONCE(c); + WRITE_ONCE(b, 2); } \ / { w = READ_ONCE(d); \ - ACCESS_ONCE(c) = 3; } / \ { x = ACCESS_ONCE(a); - ACCESS_ONCE(d) = 4; }---- --->{ y = ACCESS_ONCE(b); + WRITE_ONCE(c, 3); } / \ { x = READ_ONCE(a); + WRITE_ONCE(d, 4); }---- --->{ y = READ_ONCE(b); EXAMPLES OF MEMORY BARRIER SEQUENCES @@ -1340,10 +1340,10 @@ compiler from moving the memory accesses either side of it to the other side: barrier(); -This is a general barrier -- there are no read-read or write-write variants -of barrier(). However, ACCESS_ONCE() can be thought of as a weak form -for barrier() that affects only the specific accesses flagged by the -ACCESS_ONCE(). +This is a general barrier -- there are no read-read or write-write +variants of barrier(). However, READ_ONCE() and WRITE_ONCE() can be +thought of as weak forms of barrier() that affect only the specific +accesses flagged by the READ_ONCE() or WRITE_ONCE(). The barrier() function has the following effects: @@ -1355,9 +1355,10 @@ The barrier() function has the following effects: (*) Within a loop, forces the compiler to load the variables used in that loop's conditional on each pass through that loop. -The ACCESS_ONCE() function can prevent any number of optimizations that, -while perfectly safe in single-threaded code, can be fatal in concurrent -code. Here are some examples of these sorts of optimizations: +The READ_ONCE() and WRITE_ONCE() functions can prevent any number of +optimizations that, while perfectly safe in single-threaded code, can +be fatal in concurrent code. Here are some examples of these sorts +of optimizations: (*) The compiler is within its rights to reorder loads and stores to the same variable, and in some cases, the CPU is within its @@ -1370,11 +1371,11 @@ code. Here are some examples of these sorts of optimizations: Might result in an older value of x stored in a[1] than in a[0]. Prevent both the compiler and the CPU from doing this as follows: - a[0] = ACCESS_ONCE(x); - a[1] = ACCESS_ONCE(x); + a[0] = READ_ONCE(x); + a[1] = READ_ONCE(x); - In short, ACCESS_ONCE() provides cache coherence for accesses from - multiple CPUs to a single variable. + In short, READ_ONCE() and WRITE_ONCE() provide cache coherence for + accesses from multiple CPUs to a single variable. (*) The compiler is within its rights to merge successive loads from the same variable. Such merging can cause the compiler to "optimize" @@ -1391,9 +1392,9 @@ code. Here are some examples of these sorts of optimizations: for (;;) do_something_with(tmp); - Use ACCESS_ONCE() to prevent the compiler from doing this to you: + Use READ_ONCE() to prevent the compiler from doing this to you: - while (tmp = ACCESS_ONCE(a)) + while (tmp = READ_ONCE(a)) do_something_with(tmp); (*) The compiler is within its rights to reload a variable, for example, @@ -1415,9 +1416,9 @@ code. Here are some examples of these sorts of optimizations: a was modified by some other CPU between the "while" statement and the call to do_something_with(). - Again, use ACCESS_ONCE() to prevent the compiler from doing this: + Again, use READ_ONCE() to prevent the compiler from doing this: - while (tmp = ACCESS_ONCE(a)) + while (tmp = READ_ONCE(a)) do_something_with(tmp); Note that if the compiler runs short of registers, it might save @@ -1437,21 +1438,21 @@ code. Here are some examples of these sorts of optimizations: do { } while (0); - This transformation is a win for single-threaded code because it gets - rid of a load and a branch. The problem is that the compiler will - carry out its proof assuming that the current CPU is the only one - updating variable 'a'. If variable 'a' is shared, then the compiler's - proof will be erroneous. Use ACCESS_ONCE() to tell the compiler - that it doesn't know as much as it thinks it does: + This transformation is a win for single-threaded code because it + gets rid of a load and a branch. The problem is that the compiler + will carry out its proof assuming that the current CPU is the only + one updating variable 'a'. If variable 'a' is shared, then the + compiler's proof will be erroneous. Use READ_ONCE() to tell the + compiler that it doesn't know as much as it thinks it does: - while (tmp = ACCESS_ONCE(a)) + while (tmp = READ_ONCE(a)) do_something_with(tmp); But please note that the compiler is also closely watching what you - do with the value after the ACCESS_ONCE(). For example, suppose you + do with the value after the READ_ONCE(). For example, suppose you do the following and MAX is a preprocessor macro with the value 1: - while ((tmp = ACCESS_ONCE(a)) % MAX) + while ((tmp = READ_ONCE(a)) % MAX) do_something_with(tmp); Then the compiler knows that the result of the "%" operator applied @@ -1475,12 +1476,12 @@ code. Here are some examples of these sorts of optimizations: surprise if some other CPU might have stored to variable 'a' in the meantime. - Use ACCESS_ONCE() to prevent the compiler from making this sort of + Use WRITE_ONCE() to prevent the compiler from making this sort of wrong guess: - ACCESS_ONCE(a) = 0; + WRITE_ONCE(a, 0); /* Code that does not store to variable a. */ - ACCESS_ONCE(a) = 0; + WRITE_ONCE(a, 0); (*) The compiler is within its rights to reorder memory accesses unless you tell it not to. For example, consider the following interaction @@ -1509,40 +1510,43 @@ code. Here are some examples of these sorts of optimizations: } If the interrupt occurs between these two statement, then - interrupt_handler() might be passed a garbled msg. Use ACCESS_ONCE() + interrupt_handler() might be passed a garbled msg. Use WRITE_ONCE() to prevent this as follows: void process_level(void) { - ACCESS_ONCE(msg) = get_message(); - ACCESS_ONCE(flag) = true; + WRITE_ONCE(msg, get_message()); + WRITE_ONCE(flag, true); } void interrupt_handler(void) { - if (ACCESS_ONCE(flag)) - process_message(ACCESS_ONCE(msg)); + if (READ_ONCE(flag)) + process_message(READ_ONCE(msg)); } - Note that the ACCESS_ONCE() wrappers in interrupt_handler() - are needed if this interrupt handler can itself be interrupted - by something that also accesses 'flag' and 'msg', for example, - a nested interrupt or an NMI. Otherwise, ACCESS_ONCE() is not - needed in interrupt_handler() other than for documentation purposes. - (Note also that nested interrupts do not typically occur in modern - Linux kernels, in fact, if an interrupt handler returns with - interrupts enabled, you will get a WARN_ONCE() splat.) - - You should assume that the compiler can move ACCESS_ONCE() past - code not containing ACCESS_ONCE(), barrier(), or similar primitives. - - This effect could also be achieved using barrier(), but ACCESS_ONCE() - is more selective: With ACCESS_ONCE(), the compiler need only forget - the contents of the indicated memory locations, while with barrier() - the compiler must discard the value of all memory locations that - it has currented cached in any machine registers. Of course, - the compiler must also respect the order in which the ACCESS_ONCE()s - occur, though the CPU of course need not do so. + Note that the READ_ONCE() and WRITE_ONCE() wrappers in + interrupt_handler() are needed if this interrupt handler can itself + be interrupted by something that also accesses 'flag' and 'msg', + for example, a nested interrupt or an NMI. Otherwise, READ_ONCE() + and WRITE_ONCE() are not needed in interrupt_handler() other than + for documentation purposes. (Note also that nested interrupts + do not typically occur in modern Linux kernels, in fact, if an + interrupt handler returns with interrupts enabled, you will get a + WARN_ONCE() splat.) + + You should assume that the compiler can move READ_ONCE() and + WRITE_ONCE() past code not containing READ_ONCE(), WRITE_ONCE(), + barrier(), or similar primitives. + + This effect could also be achieved using barrier(), but READ_ONCE() + and WRITE_ONCE() are more selective: With READ_ONCE() and + WRITE_ONCE(), the compiler need only forget the contents of the + indicated memory locations, while with barrier() the compiler must + discard the value of all memory locations that it has currented + cached in any machine registers. Of course, the compiler must also + respect the order in which the READ_ONCE()s and WRITE_ONCE()s occur, + though the CPU of course need not do so. (*) The compiler is within its rights to invent stores to a variable, as in the following example: @@ -1562,16 +1566,16 @@ code. Here are some examples of these sorts of optimizations: a branch. Unfortunately, in concurrent code, this optimization could cause some other CPU to see a spurious value of 42 -- even if variable 'a' was never zero -- when loading variable 'b'. - Use ACCESS_ONCE() to prevent this as follows: + Use WRITE_ONCE() to prevent this as follows: if (a) - ACCESS_ONCE(b) = a; + WRITE_ONCE(b, a); else - ACCESS_ONCE(b) = 42; + WRITE_ONCE(b, 42); The compiler can also invent loads. These are usually less damaging, but they can result in cache-line bouncing and thus in - poor performance and scalability. Use ACCESS_ONCE() to prevent + poor performance and scalability. Use READ_ONCE() to prevent invented loads. (*) For aligned memory locations whose size allows them to be accessed @@ -1590,9 +1594,9 @@ code. Here are some examples of these sorts of optimizations: This optimization can therefore be a win in single-threaded code. In fact, a recent bug (since fixed) caused GCC to incorrectly use this optimization in a volatile store. In the absence of such bugs, - use of ACCESS_ONCE() prevents store tearing in the following example: + use of WRITE_ONCE() prevents store tearing in the following example: - ACCESS_ONCE(p) = 0x00010002; + WRITE_ONCE(p, 0x00010002); Use of packed structures can also result in load and store tearing, as in this example: @@ -1609,22 +1613,23 @@ code. Here are some examples of these sorts of optimizations: foo2.b = foo1.b; foo2.c = foo1.c; - Because there are no ACCESS_ONCE() wrappers and no volatile markings, - the compiler would be well within its rights to implement these three - assignment statements as a pair of 32-bit loads followed by a pair - of 32-bit stores. This would result in load tearing on 'foo1.b' - and store tearing on 'foo2.b'. ACCESS_ONCE() again prevents tearing - in this example: + Because there are no READ_ONCE() or WRITE_ONCE() wrappers and no + volatile markings, the compiler would be well within its rights to + implement these three assignment statements as a pair of 32-bit + loads followed by a pair of 32-bit stores. This would result in + load tearing on 'foo1.b' and store tearing on 'foo2.b'. READ_ONCE() + and WRITE_ONCE() again prevent tearing in this example: foo2.a = foo1.a; - ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b); + WRITE_ONCE(foo2.b, READ_ONCE(foo1.b)); foo2.c = foo1.c; -All that aside, it is never necessary to use ACCESS_ONCE() on a variable -that has been marked volatile. For example, because 'jiffies' is marked -volatile, it is never necessary to say ACCESS_ONCE(jiffies). The reason -for this is that ACCESS_ONCE() is implemented as a volatile cast, which -has no effect when its argument is already marked volatile. +All that aside, it is never necessary to use READ_ONCE() and +WRITE_ONCE() on a variable that has been marked volatile. For example, +because 'jiffies' is marked volatile, it is never necessary to +say READ_ONCE(jiffies). The reason for this is that READ_ONCE() and +WRITE_ONCE() are implemented as volatile casts, which has no effect when +its argument is already marked volatile. Please note that these compiler barriers have no direct effect on the CPU, which may then reorder things however it wishes. @@ -1646,14 +1651,15 @@ The Linux kernel has eight basic CPU memory barriers: All memory barriers except the data dependency barriers imply a compiler barrier. Data dependencies do not impose any additional compiler ordering. -Aside: In the case of data dependencies, the compiler would be expected to -issue the loads in the correct order (eg. `a[b]` would have to load the value -of b before loading a[b]), however there is no guarantee in the C specification -that the compiler may not speculate the value of b (eg. is equal to 1) and load -a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the -problem of a compiler reloading b after having loaded a[b], thus having a newer -copy of b than a[b]. A consensus has not yet been reached about these problems, -however the ACCESS_ONCE macro is a good place to start looking. +Aside: In the case of data dependencies, the compiler would be expected +to issue the loads in the correct order (eg. `a[b]` would have to load +the value of b before loading a[b]), however there is no guarantee in +the C specification that the compiler may not speculate the value of b +(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1) +tmp = a[b]; ). There is also the problem of a compiler reloading b after +having loaded a[b], thus having a newer copy of b than a[b]. A consensus +has not yet been reached about these problems, however the READ_ONCE() +macro is a good place to start looking. SMP memory barriers are reduced to compiler barriers on uniprocessor compiled systems because it is assumed that a CPU will appear to be self-consistent, @@ -2126,12 +2132,12 @@ three CPUs; then should the following sequence of events occur: CPU 1 CPU 2 =============================== =============================== - ACCESS_ONCE(*A) = a; ACCESS_ONCE(*E) = e; + WRITE_ONCE(*A, a); WRITE_ONCE(*E, e); ACQUIRE M ACQUIRE Q - ACCESS_ONCE(*B) = b; ACCESS_ONCE(*F) = f; - ACCESS_ONCE(*C) = c; ACCESS_ONCE(*G) = g; + WRITE_ONCE(*B, b); WRITE_ONCE(*F, f); + WRITE_ONCE(*C, c); WRITE_ONCE(*G, g); RELEASE M RELEASE Q - ACCESS_ONCE(*D) = d; ACCESS_ONCE(*H) = h; + WRITE_ONCE(*D, d); WRITE_ONCE(*H, h); Then there is no guarantee as to what order CPU 3 will see the accesses to *A through *H occur in, other than the constraints imposed by the separate locks @@ -2151,18 +2157,18 @@ However, if the following occurs: CPU 1 CPU 2 =============================== =============================== - ACCESS_ONCE(*A) = a; + WRITE_ONCE(*A, a); ACQUIRE M [1] - ACCESS_ONCE(*B) = b; - ACCESS_ONCE(*C) = c; + WRITE_ONCE(*B, b); + WRITE_ONCE(*C, c); RELEASE M [1] - ACCESS_ONCE(*D) = d; ACCESS_ONCE(*E) = e; + WRITE_ONCE(*D, d); WRITE_ONCE(*E, e); ACQUIRE M [2] smp_mb__after_unlock_lock(); - ACCESS_ONCE(*F) = f; - ACCESS_ONCE(*G) = g; + WRITE_ONCE(*F, f); + WRITE_ONCE(*G, g); RELEASE M [2] - ACCESS_ONCE(*H) = h; + WRITE_ONCE(*H, h); CPU 3 might see: @@ -2881,11 +2887,11 @@ A programmer might take it for granted that the CPU will perform memory operations in exactly the order specified, so that if the CPU is, for example, given the following piece of code to execute: - a = ACCESS_ONCE(*A); - ACCESS_ONCE(*B) = b; - c = ACCESS_ONCE(*C); - d = ACCESS_ONCE(*D); - ACCESS_ONCE(*E) = e; + a = READ_ONCE(*A); + WRITE_ONCE(*B, b); + c = READ_ONCE(*C); + d = READ_ONCE(*D); + WRITE_ONCE(*E, e); they would then expect that the CPU will complete the memory operation for each instruction before moving on to the next one, leading to a definite sequence of @@ -2932,12 +2938,12 @@ However, it is guaranteed that a CPU will be self-consistent: it will see its _own_ accesses appear to be correctly ordered, without the need for a memory barrier. For instance with the following code: - U = ACCESS_ONCE(*A); - ACCESS_ONCE(*A) = V; - ACCESS_ONCE(*A) = W; - X = ACCESS_ONCE(*A); - ACCESS_ONCE(*A) = Y; - Z = ACCESS_ONCE(*A); + U = READ_ONCE(*A); + WRITE_ONCE(*A, V); + WRITE_ONCE(*A, W); + X = READ_ONCE(*A); + WRITE_ONCE(*A, Y); + Z = READ_ONCE(*A); and assuming no intervention by an external influence, it can be assumed that the final result will appear to be: @@ -2953,13 +2959,14 @@ accesses: U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A in that order, but, without intervention, the sequence may have almost any -combination of elements combined or discarded, provided the program's view of -the world remains consistent. Note that ACCESS_ONCE() is -not- optional -in the above example, as there are architectures where a given CPU might -reorder successive loads to the same location. On such architectures, -ACCESS_ONCE() does whatever is necessary to prevent this, for example, on -Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the -special ld.acq and st.rel instructions that prevent such reordering. +combination of elements combined or discarded, provided the program's view +of the world remains consistent. Note that READ_ONCE() and WRITE_ONCE() +are -not- optional in the above example, as there are architectures +where a given CPU might reorder successive loads to the same location. +On such architectures, READ_ONCE() and WRITE_ONCE() do whatever is +necessary to prevent this, for example, on Itanium the volatile casts +used by READ_ONCE() and WRITE_ONCE() cause GCC to emit the special ld.acq +and st.rel instructions (respectively) that prevent such reordering. The compiler may also combine, discard or defer elements of the sequence before the CPU even sees them. @@ -2973,13 +2980,14 @@ may be reduced to: *A = W; -since, without either a write barrier or an ACCESS_ONCE(), it can be +since, without either a write barrier or an WRITE_ONCE(), it can be assumed that the effect of the storage of V to *A is lost. Similarly: *A = Y; Z = *A; -may, without a memory barrier or an ACCESS_ONCE(), be reduced to: +may, without a memory barrier or an READ_ONCE() and WRITE_ONCE(), be +reduced to: *A = Y; Z = Y; -- cgit v1.2.3 From 96d7744e0a5631a1b5fef2a97658150b165f02b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Jul 2015 15:55:52 -0700 Subject: doc: Call out smp_mb__after_unlock_lock() transitivity Although "full barrier" should be interpreted as providing transitivity, it is worth eliminating any possible confusion. This commit therefore adds "(including transitivity)" to eliminate any possible confusion. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- Documentation/memory-barriers.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 470c07c868e4..318523872db5 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1858,11 +1858,12 @@ Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation. This will produce a full barrier -if either (a) the RELEASE and the ACQUIRE are executed by the same -CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable. -The smp_mb__after_unlock_lock() primitive is free on many architectures. -Without smp_mb__after_unlock_lock(), the CPU's execution of the critical -sections corresponding to the RELEASE and the ACQUIRE can cross, so that: +(including transitivity) if either (a) the RELEASE and the ACQUIRE are +executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on +the same variable. The smp_mb__after_unlock_lock() primitive is free +on many architectures. Without smp_mb__after_unlock_lock(), the CPU's +execution of the critical sections corresponding to the RELEASE and the +ACQUIRE can cross, so that: *A = a; RELEASE M -- cgit v1.2.3 From 75c27f119b6475d95374bdad872c6938b5c26196 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 Jun 2015 15:22:43 -0700 Subject: rcu: Remove CONFIG_RCU_CPU_STALL_INFO The CONFIG_RCU_CPU_STALL_INFO has been default-y for a couple of releases with no complaints, so it is time to eliminate this Kconfig option entirely, so that the long-form RCU CPU stall warnings cannot be disabled. This commit does just that. Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.txt | 12 +----- kernel/rcu/tree.h | 4 -- kernel/rcu/tree_plugin.h | 45 ---------------------- lib/Kconfig.debug | 14 ------- .../selftests/rcutorture/configs/rcu/TREE01 | 1 - .../selftests/rcutorture/configs/rcu/TREE02 | 1 - .../selftests/rcutorture/configs/rcu/TREE02-T | 1 - .../selftests/rcutorture/configs/rcu/TREE03 | 1 - .../selftests/rcutorture/configs/rcu/TREE04 | 1 - .../selftests/rcutorture/configs/rcu/TREE05 | 1 - .../selftests/rcutorture/configs/rcu/TREE06 | 1 - .../selftests/rcutorture/configs/rcu/TREE07 | 1 - .../selftests/rcutorture/configs/rcu/TREE08 | 1 - .../selftests/rcutorture/configs/rcu/TREE08-T | 1 - .../selftests/rcutorture/configs/rcu/TREE09 | 1 - .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 1 - 16 files changed, 2 insertions(+), 85 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index b57c0c1cdac6..046f32637b95 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -26,12 +26,6 @@ CONFIG_RCU_CPU_STALL_TIMEOUT Stall-warning messages may be enabled and disabled completely via /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress. -CONFIG_RCU_CPU_STALL_INFO - - This kernel configuration parameter causes the stall warning to - print out additional per-CPU diagnostic information, including - information on scheduling-clock ticks and RCU's idle-CPU tracking. - RCU_STALL_DELAY_DELTA Although the lockdep facility is extremely useful, it does add @@ -101,15 +95,13 @@ interact. Please note that it is not possible to entirely eliminate this sort of false positive without resorting to things like stop_machine(), which is overkill for this sort of problem. -If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set, -more information is printed with the stall-warning message, for example: +Recent kernels will print a long form of the stall-warning message: INFO: rcu_preempt detected stall on CPU 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543 (t=65000 jiffies) -In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is -printed: +In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed: INFO: rcu_preempt detected stall on CPU 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index faee5242d6ff..7c0b09d754a1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -288,12 +288,10 @@ struct rcu_data { bool gpwrap; /* Possible gpnum/completed wrap. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ -#ifdef CONFIG_RCU_CPU_STALL_INFO unsigned long ticks_this_gp; /* The number of scheduling-clock */ /* ticks this CPU has handled */ /* during and after the last grace */ /* period it is aware of. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ /* 2) batch handling */ /* @@ -388,9 +386,7 @@ struct rcu_data { #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ /* 8) RCU CPU stall data. */ -#ifdef CONFIG_RCU_CPU_STALL_INFO unsigned int softirq_snap; /* Snapshot of softirq activity. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ int cpu; struct rcu_state *rsp; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7234f03e0aa2..ef41c1b04ba6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -82,8 +82,6 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE)) pr_info("\tRCU torture testing starts during boot.\n"); - if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO)) - pr_info("\tAdditional per-CPU info printed with stalls.\n"); if (RCU_NUM_LVLS >= 4) pr_info("\tFour(or more)-level hierarchy is enabled.\n"); if (RCU_FANOUT_LEAF != 16) @@ -418,8 +416,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) rcu_print_detail_task_stall_rnp(rnp); } -#ifdef CONFIG_RCU_CPU_STALL_INFO - static void rcu_print_task_stall_begin(struct rcu_node *rnp) { pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", @@ -431,18 +427,6 @@ static void rcu_print_task_stall_end(void) pr_cont("\n"); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ - -static void rcu_print_task_stall_begin(struct rcu_node *rnp) -{ -} - -static void rcu_print_task_stall_end(void) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ - /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. @@ -1685,8 +1669,6 @@ early_initcall(rcu_register_oom_notifier); #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -#ifdef CONFIG_RCU_CPU_STALL_INFO - #ifdef CONFIG_RCU_FAST_NO_HZ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) @@ -1775,33 +1757,6 @@ static void increment_cpu_stall_ticks(void) raw_cpu_inc(rsp->rda->ticks_this_gp); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ - -static void print_cpu_stall_info_begin(void) -{ - pr_cont(" {"); -} - -static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) -{ - pr_cont(" %d", cpu); -} - -static void print_cpu_stall_info_end(void) -{ - pr_cont("} "); -} - -static void zero_cpu_stall_ticks(struct rcu_data *rdp) -{ -} - -static void increment_cpu_stall_ticks(void) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ - #ifdef CONFIG_RCU_NOCB_CPU /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e2894b23efb6..8a34205d6922 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1353,20 +1353,6 @@ config RCU_CPU_STALL_TIMEOUT RCU grace period persists, additional CPU stall warnings are printed at more widely spaced intervals. -config RCU_CPU_STALL_INFO - bool "Print additional diagnostics on RCU CPU stall" - depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL - default y - help - For each stalled CPU that is aware of the current RCU grace - period, print out additional per-CPU diagnostic information - regarding scheduling-clock ticks, idle state, and, - for RCU_FAST_NO_HZ kernels, idle-entry state. - - Say N if you are unsure. - - Say Y if you want to enable such diagnostics. - config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 8e9137f66831..f572b873c620 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -13,7 +13,6 @@ CONFIG_MAXSMP=y CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index aeea6a204d14..ef6a22c44dea 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -17,7 +17,6 @@ CONFIG_RCU_FANOUT_LEAF=3 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T index 2ac9e68ea3d1..917d2517b5b5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T @@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT_LEAF=3 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 72aa7d87ea99..7a17c503b382 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -13,7 +13,6 @@ CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 3f5112751cda..39a2c6d7d7ec 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=4 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index c04dfea6fd21..1257d3227b1e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -17,6 +17,5 @@ CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index f51d2c73a68e..d3e456b74cbe 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -18,6 +18,5 @@ CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index f422af4ff5a3..3956b4131f72 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index a24d2ca30646..bb9b0c1a23c2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -19,7 +19,6 @@ CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T index b2b8cea69dc9..2ad13f0d29cc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T @@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index aa4ed08d999d..6710e749d9de 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -13,7 +13,6 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n #CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index b24c0004fc49..657f3a035488 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -16,7 +16,6 @@ CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. -CONFIG_RCU_CPU_STALL_INFO -- Now default, avoid at least twice. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. -- cgit v1.2.3 From 99a930b0d2b018f31474d69137f311ce3581a4c2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Jun 2015 14:54:09 -0700 Subject: documentation: Describe new expedited stall warnings Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 046f32637b95..efb9454875ab 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -163,6 +163,23 @@ message will be about three times the interval between the beginning of the stall and the first message. +Stall Warnings for Expedited Grace Periods + +If an expedited grace period detects a stall, it will place a message +like the following in dmesg: + + INFO: rcu_sched detected expedited stalls on CPUs: { 1 2 6 } 26009 jiffies s: 1043 + +This indicates that CPUs 1, 2, and 6 have failed to respond to a +reschedule IPI, that the expedited grace period has been going on for +26,009 jiffies, and that the expedited grace-period sequence counter is +1043. The fact that this last value is odd indicates that an expedited +grace period is in flight. + +It is entirely possible to see stall warnings from normal and from +expedited grace periods at about the same time from the same run. + + What Causes RCU CPU Stall Warnings? So your kernel printed an RCU CPU stall warning. The next question is -- cgit v1.2.3 From cdacbe1f91264687af956e810278030f2ab5a3d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 11 Jul 2015 16:24:45 -0700 Subject: rcu: Add fastpath bypassing funnel locking In the common case, there will be only one expedited grace period in the system at a given time, in which case it is not helpful to use funnel locking. This commit therefore adds a fastpath that bypasses funnel locking when the root ->exp_funnel_mutex is not held. Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 36 ++++++++++-------------------------- kernel/rcu/tree.c | 16 ++++++++++++++++ kernel/rcu/tree.h | 2 +- kernel/rcu/tree_trace.c | 4 ++-- 4 files changed, 29 insertions(+), 29 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 08651da15448..97f17e9decda 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -237,42 +237,26 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: -s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872 +s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872 These fields are as follows: -o "s" is the starting sequence number. +o "s" is the sequence number, with an odd number indicating that + an expedited grace period is in progress. -o "d" is the ending sequence number. When the starting and ending - numbers differ, there is an expedited grace period in progress. - -o "w" is the number of times that the sequence numbers have been - in danger of wrapping. - -o "tf" is the number of times that contention has resulted in a - failure to begin an expedited grace period. - -o "wd1" and "wd2" are the number of times that an attempt to - start an expedited grace period found that someone else had - completed an expedited grace period that satisfies the +o "wd0", "wd1", "wd2", and "wd3" are the number of times that an + attempt to start an expedited grace period found that someone + else had completed an expedited grace period that satisfies the attempted request. "Our work is done." -o "n" is number of times that contention was so great that - the request was demoted from an expedited grace period to - a normal grace period. +o "n" is number of times that a concurrent CPU-hotplug operation + forced a fallback to a normal grace period. + +o "enq" is the number of quiescent states still outstanding. o "sc" is the number of times that the attempt to start a new expedited grace period succeeded. -o "dt" is the number of times that we attempted to update - the "d" counter. - -o "dl" is the number of times that we failed to update the "d" - counter. - -o "dx" is the number of times that we succeeded in updating - the "d" counter. - The output of "cat rcu/rcu_preempt/rcugp" looks as follows: diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f66f6e7730bc..3af0dee2d045 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3355,6 +3355,22 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) struct rcu_node *rnp0; struct rcu_node *rnp1 = NULL; + /* + * First try directly acquiring the root lock in order to reduce + * latency in the common case where expedited grace periods are + * rare. We check mutex_is_locked() to avoid pathological levels of + * memory contention on ->exp_funnel_mutex in the heavy-load case. + */ + rnp0 = rcu_get_root(rsp); + if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) { + if (mutex_trylock(&rnp0->exp_funnel_mutex)) { + if (sync_exp_work_done(rsp, rnp0, NULL, + &rsp->expedited_workdone0, s)) + return NULL; + return rnp0; + } + } + /* * Each pass through the following loop works its way * up the rcu_node tree, returning if others have done the diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 543ba726396c..80d974df0ea0 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -493,7 +493,7 @@ struct rcu_state { /* End of fields guarded by barrier_mutex. */ unsigned long expedited_sequence; /* Take a ticket. */ - atomic_long_t expedited_tryfail; /* # acquisition failures. */ + atomic_long_t expedited_workdone0; /* # done by others #0. */ atomic_long_t expedited_workdone1; /* # done by others #1. */ atomic_long_t expedited_workdone2; /* # done by others #2. */ atomic_long_t expedited_workdone3; /* # done by others #3. */ diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index ec62369f1b02..6fc4c5ff3bb5 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,9 +185,9 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", + seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, - atomic_long_read(&rsp->expedited_tryfail), + atomic_long_read(&rsp->expedited_workdone0), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), atomic_long_read(&rsp->expedited_workdone3), -- cgit v1.2.3 From f78f5b90c4ffa559e400c3919a02236101f29f3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Jun 2015 15:50:02 -0700 Subject: rcu: Rename rcu_lockdep_assert() to RCU_LOCKDEP_WARN() This commit renames rcu_lockdep_assert() to RCU_LOCKDEP_WARN() for consistency with the WARN() series of macros. This also requires inverting the sense of the conditional, which this commit also does. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Ingo Molnar --- Documentation/RCU/whatisRCU.txt | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++-- arch/x86/kernel/traps.c | 2 +- drivers/base/power/opp.c | 4 +-- include/linux/fdtable.h | 4 +-- include/linux/rcupdate.h | 63 ++++++++++++++++++++++++++-------------- kernel/cgroup.c | 4 +-- kernel/pid.c | 5 ++-- kernel/rcu/srcu.c | 10 +++---- kernel/rcu/tiny.c | 8 ++--- kernel/rcu/tree.c | 28 +++++++++--------- kernel/rcu/tree_plugin.h | 8 ++--- kernel/rcu/update.c | 4 +-- kernel/sched/core.c | 8 ++--- kernel/workqueue.c | 20 ++++++------- security/device_cgroup.c | 6 ++-- 16 files changed, 101 insertions(+), 81 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5746b0c77f3e..adc2184009c5 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -883,7 +883,7 @@ All: lockdep-checked RCU-protected pointer access rcu_access_pointer rcu_dereference_raw - rcu_lockdep_assert + RCU_LOCKDEP_WARN rcu_sleep_check RCU_NONIDLE diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index df919ff103c3..3d6b5269fb2e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -54,9 +54,9 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); #define rcu_dereference_check_mce(p) \ ({ \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&mce_chrdev_read_mutex), \ - "suspicious rcu_dereference_check_mce() usage"); \ + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&mce_chrdev_read_mutex), \ + "suspicious rcu_dereference_check_mce() usage"); \ smp_load_acquire(&(p)); \ }) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f5791927aa64..c5a5231d1d11 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -136,7 +136,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) preempt_count_add(HARDIRQ_OFFSET); /* This code is a bit fragile. Test it. */ - rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); return prev_state; } diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 677fb2843553..3b188f20b43f 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -110,8 +110,8 @@ static DEFINE_MUTEX(dev_opp_list_lock); #define opp_rcu_lockdep_assert() \ do { \ - rcu_lockdep_assert(rcu_read_lock_held() || \ - lockdep_is_held(&dev_opp_list_lock), \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ + !lockdep_is_held(&dev_opp_list_lock), \ "Missing rcu_read_lock() or " \ "dev_opp_list_lock protection"); \ } while (0) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fbb88740634a..674e3e226465 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -86,8 +86,8 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); return __fcheck_files(files, fd); } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 33ec16b9c2ee..ff476515f716 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -536,6 +536,11 @@ static inline int rcu_read_lock_sched_held(void) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */ +static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void) +{ +} + #ifdef CONFIG_PROVE_RCU /** @@ -546,17 +551,32 @@ static inline int rcu_read_lock_sched_held(void) #define rcu_lockdep_assert(c, s) \ do { \ static bool __section(.data.unlikely) __warned; \ + deprecate_rcu_lockdep_assert(); \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +/** + * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met + * @c: condition to check + * @s: informative message + */ +#define RCU_LOCKDEP_WARN(c, s) \ + do { \ + static bool __section(.data.unlikely) __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \ + __warned = true; \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ + } \ + } while (0) + #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) static inline void rcu_preempt_sleep_check(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal context switch in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), + "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) @@ -567,15 +587,16 @@ static inline void rcu_preempt_sleep_check(void) #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ - "Illegal context switch in RCU-bh read-side critical section"); \ - rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ - "Illegal context switch in RCU-sched read-side critical section"); \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh read-side critical section"); \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert() +#define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -606,13 +627,13 @@ static inline void rcu_preempt_sleep_check(void) ({ \ /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ + RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ + RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -836,8 +857,8 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock() used illegally while idle"); } /* @@ -887,8 +908,8 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock() used illegally while idle"); __release(RCU); __rcu_read_unlock(); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ @@ -916,8 +937,8 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock_bh() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock_bh() used illegally while idle"); } /* @@ -927,8 +948,8 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock_bh() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); @@ -952,8 +973,8 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock_sched() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -970,8 +991,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock_sched() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..b89f3168411b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -107,8 +107,8 @@ static DEFINE_SPINLOCK(release_agent_path_lock); struct percpu_rw_semaphore cgroup_threadgroup_rwsem; #define cgroup_assert_mutex_or_rcu_locked() \ - rcu_lockdep_assert(rcu_read_lock_held() || \ - lockdep_is_held(&cgroup_mutex), \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ + !lockdep_is_held(&cgroup_mutex), \ "cgroup_mutex or RCU read lock required"); /* diff --git a/kernel/pid.c b/kernel/pid.c index 4fd07d5b7baf..ca368793808e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -451,9 +451,8 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { - rcu_lockdep_assert(rcu_read_lock_held(), - "find_task_by_pid_ns() needs rcu_read_lock()" - " protection"); + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "find_task_by_pid_ns() needs rcu_read_lock() protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index de35087c92a5..d3fcb2ec8536 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -415,11 +415,11 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) struct rcu_head *head = &rcu.head; bool done = false; - rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && - !lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); might_sleep(); init_completion(&rcu.completion); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index c291bd65d2cb..d0471056d0af 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -191,10 +191,10 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ void synchronize_sched(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_sched() in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_sched() in RCU read-side critical section"); cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb64d7e13d24..0a73d26357a2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -649,12 +649,12 @@ static void rcu_eqs_enter_common(long long oldval, bool user) * It is illegal to enter an extended quiescent state while * in an RCU read-side critical section. */ - rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), + "Illegal idle entry in RCU read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), + "Illegal idle entry in RCU-bh read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), + "Illegal idle entry in RCU-sched read-side critical section."); } /* @@ -3161,10 +3161,10 @@ static inline int rcu_blocking_is_gp(void) */ void synchronize_sched(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_sched() in RCU-sched read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_sched() in RCU-sched read-side critical section"); if (rcu_blocking_is_gp()) return; if (rcu_gp_is_expedited()) @@ -3188,10 +3188,10 @@ EXPORT_SYMBOL_GPL(synchronize_sched); */ void synchronize_rcu_bh(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); if (rcu_blocking_is_gp()) return; if (rcu_gp_is_expedited()) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a983bc68a146..9e922f111d63 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -538,10 +538,10 @@ EXPORT_SYMBOL_GPL(call_rcu); */ void synchronize_rcu(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu() in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_rcu() in RCU read-side critical section"); if (!rcu_scheduler_active) return; if (rcu_gp_is_expedited()) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a0a0dd03c73a..47268fb1d27b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -589,8 +589,8 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - rcu_lockdep_assert(!rcu_scheduler_active, - "synchronize_rcu_tasks called too soon"); + RCU_LOCKDEP_WARN(rcu_scheduler_active, + "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ wait_rcu_gp(call_rcu_tasks); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..5e73c79fadd0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2200,8 +2200,8 @@ unsigned long to_ratio(u64 period, u64 runtime) #ifdef CONFIG_SMP inline struct dl_bw *dl_bw_of(int i) { - rcu_lockdep_assert(rcu_read_lock_sched_held(), - "sched RCU must be held"); + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), + "sched RCU must be held"); return &cpu_rq(i)->rd->dl_bw; } @@ -2210,8 +2210,8 @@ static inline int dl_bw_cpus(int i) struct root_domain *rd = cpu_rq(i)->rd; int cpus = 0; - rcu_lockdep_assert(rcu_read_lock_sched_held(), - "sched RCU must be held"); + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), + "sched RCU must be held"); for_each_cpu_and(i, rd->span, cpu_active_mask) cpus++; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4c4f06176f74..cb91c63b4f4a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -338,20 +338,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); #include #define assert_rcu_or_pool_mutex() \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq_pool_mutex), \ - "sched RCU or wq_pool_mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq_pool_mutex), \ + "sched RCU or wq_pool_mutex should be held") #define assert_rcu_or_wq_mutex(wq) \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq->mutex), \ - "sched RCU or wq->mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq->mutex), \ + "sched RCU or wq->mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq->mutex) || \ - lockdep_is_held(&wq_pool_mutex), \ - "sched RCU, wq->mutex or wq_pool_mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq->mutex) && \ + !lockdep_is_held(&wq_pool_mutex), \ + "sched RCU, wq->mutex or wq_pool_mutex should be held") #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 188c1d26393b..73455089feef 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -400,9 +400,9 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, { bool match = false; - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&devcgroup_mutex), - "device_cgroup:verify_new_ex called without proper synchronization"); + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + lockdep_is_held(&devcgroup_mutex), + "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { if (behavior == DEVCG_DEFAULT_ALLOW) { -- cgit v1.2.3 From 12d560f4ea87030667438a169912380be00cea4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jul 2015 18:35:23 -0700 Subject: rcu,locking: Privatize smp_mb__after_unlock_lock() RCU is the only thing that uses smp_mb__after_unlock_lock(), and is likely the only thing that ever will use it, so this commit makes this macro private to RCU. Signed-off-by: Paul E. McKenney Cc: Will Deacon Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: "linux-arch@vger.kernel.org" --- Documentation/memory-barriers.txt | 71 +++---------------------------------- arch/powerpc/include/asm/spinlock.h | 2 -- include/linux/spinlock.h | 10 ------ kernel/rcu/tree.h | 12 +++++++ 4 files changed, 16 insertions(+), 79 deletions(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 318523872db5..eafa6a53f72c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1854,16 +1854,10 @@ RELEASE are to the same lock variable, but only from the perspective of another CPU not holding that lock. In short, a ACQUIRE followed by an RELEASE may -not- be assumed to be a full memory barrier. -Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not -imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE -pair to produce a full barrier, the ACQUIRE can be followed by an -smp_mb__after_unlock_lock() invocation. This will produce a full barrier -(including transitivity) if either (a) the RELEASE and the ACQUIRE are -executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on -the same variable. The smp_mb__after_unlock_lock() primitive is free -on many architectures. Without smp_mb__after_unlock_lock(), the CPU's -execution of the critical sections corresponding to the RELEASE and the -ACQUIRE can cross, so that: +Similarly, the reverse case of a RELEASE followed by an ACQUIRE does +not imply a full memory barrier. Therefore, the CPU's execution of the +critical sections corresponding to the RELEASE and the ACQUIRE can cross, +so that: *A = a; RELEASE M @@ -1901,29 +1895,6 @@ the RELEASE would simply complete, thereby avoiding the deadlock. a sleep-unlock race, but the locking primitive needs to resolve such races properly in any case. -With smp_mb__after_unlock_lock(), the two critical sections cannot overlap. -For example, with the following code, the store to *A will always be -seen by other CPUs before the store to *B: - - *A = a; - RELEASE M - ACQUIRE N - smp_mb__after_unlock_lock(); - *B = b; - -The operations will always occur in one of the following orders: - - STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B - STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B - ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B - -If the RELEASE and ACQUIRE were instead both operating on the same lock -variable, only the first of these alternatives can occur. In addition, -the more strongly ordered systems may rule out some of the above orders. -But in any case, as noted earlier, the smp_mb__after_unlock_lock() -ensures that the store to *A will always be seen as happening before -the store to *B. - Locks and semaphores may not provide any guarantee of ordering on UP compiled systems, and so cannot be counted on in such a situation to actually achieve anything at all - especially with respect to I/O accesses - unless combined @@ -2154,40 +2125,6 @@ But it won't see any of: *E, *F or *G following RELEASE Q -However, if the following occurs: - - CPU 1 CPU 2 - =============================== =============================== - WRITE_ONCE(*A, a); - ACQUIRE M [1] - WRITE_ONCE(*B, b); - WRITE_ONCE(*C, c); - RELEASE M [1] - WRITE_ONCE(*D, d); WRITE_ONCE(*E, e); - ACQUIRE M [2] - smp_mb__after_unlock_lock(); - WRITE_ONCE(*F, f); - WRITE_ONCE(*G, g); - RELEASE M [2] - WRITE_ONCE(*H, h); - -CPU 3 might see: - - *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1], - ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D - -But assuming CPU 1 gets the lock first, CPU 3 won't see any of: - - *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1] - *A, *B or *C following RELEASE M [1] - *F, *G or *H preceding ACQUIRE M [2] - *A, *B, *C, *E, *F or *G following RELEASE M [2] - -Note that the smp_mb__after_unlock_lock() is critically important -here: Without it CPU 3 might see some of the above orderings. -Without smp_mb__after_unlock_lock(), the accesses are not guaranteed -to be seen in order unless CPU 3 holds lock M. - ACQUIRES VS I/O ACCESSES ------------------------ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 4dbe072eecbe..523673d7583c 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,8 +28,6 @@ #include #include -#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ - #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ #ifdef __BIG_ENDIAN__ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0063b24b4f36..16c5ed5a627c 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,16 +130,6 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif -/* - * Place this after a lock-acquisition primitive to guarantee that - * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies - * if the UNLOCK and LOCK are executed by the same CPU or if the - * UNLOCK and LOCK operate on the same lock variable. - */ -#ifndef smp_mb__after_unlock_lock -#define smp_mb__after_unlock_lock() do { } while (0) -#endif - /** * raw_spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 0412030ca882..2e991f8361e4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -653,3 +653,15 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ } #endif /* #ifdef CONFIG_RCU_TRACE */ + +/* + * Place this after a lock-acquisition primitive to guarantee that + * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies + * if the UNLOCK and LOCK are executed by the same CPU or if the + * UNLOCK and LOCK operate on the same lock variable. + */ +#ifdef CONFIG_PPC +#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ +#else /* #ifdef CONFIG_PPC */ +#define smp_mb__after_unlock_lock() do { } while (0) +#endif /* #else #ifdef CONFIG_PPC */ -- cgit v1.2.3