summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html25
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--include/trace/events/rcu.h5
-rw-r--r--kernel/rcu/rcutorture.c11
-rw-r--r--kernel/rcu/tree.c17
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_exp.h12
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh5
10 files changed, 64 insertions, 19 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index a4d3838130e4..39bcb74ea733 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -547,7 +547,7 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
It could reuse a value formerly fetched from this same pointer.
It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
- mash-up of two distince pointer values.
+ mash-up of two distinct pointer values.
It might even use value-speculation optimizations, where it makes
a wrong guess, but by the time it gets around to checking the
value, an update has changed the pointer to match the wrong guess.
@@ -659,6 +659,29 @@ systems with more than one CPU:
In other words, a given instance of <tt>synchronize_rcu()</tt>
can avoid waiting on a given RCU read-side critical section only
if it can prove that <tt>synchronize_rcu()</tt> started first.
+
+ <p>
+ A related question is &ldquo;When <tt>rcu_read_lock()</tt>
+ doesn't generate any code, why does it matter how it relates
+ to a grace period?&rdquo;
+ The answer is that it is not the relationship of
+ <tt>rcu_read_lock()</tt> itself that is important, but rather
+ the relationship of the code within the enclosed RCU read-side
+ critical section to the code preceding and following the
+ grace period.
+ If we take this viewpoint, then a given RCU read-side critical
+ section begins before a given grace period when some access
+ preceding the grace period observes the effect of some access
+ within the critical section, in which case none of the accesses
+ within the critical section may observe the effects of any
+ access following the grace period.
+
+ <p>
+ As of late 2016, mathematical models of RCU take this
+ viewpoint, for example, see slides&nbsp;62 and&nbsp;63
+ of the
+ <a href="http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.2016.10.04c.LCE.pdf">2016 LinuxCon EU</a>
+ presentation.
</font></td></tr>
<tr><td>&nbsp;</td></tr>
</table>
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 204422719197..5cbd8b2395b8 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -237,7 +237,7 @@ rcu_dereference()
The reader uses rcu_dereference() to fetch an RCU-protected
pointer, which returns a value that may then be safely
- dereferenced. Note that rcu_deference() does not actually
+ dereferenced. Note that rcu_dereference() does not actually
dereference the pointer, instead, it protects the pointer for
later dereferencing. It also executes any needed memory-barrier
instructions for a given CPU architecture. Currently, only Alpha
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d3e756539d44..9d4f9b3a2b7b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -698,7 +698,10 @@ TRACE_EVENT(rcu_batch_end,
/*
* Tracepoint for rcutorture readers. The first argument is the name
* of the RCU flavor from rcutorture's viewpoint and the second argument
- * is the callback address.
+ * is the callback address. The third argument is the start time in
+ * seconds, and the last two arguments are the grace period numbers
+ * at the beginning and end of the read, respectively. Note that the
+ * callback address can be NULL.
*/
TRACE_EVENT(rcu_torture_read,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index bf08fee53dc7..87c51225ceec 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -289,15 +289,24 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
static void rcu_read_delay(struct torture_random_state *rrsp)
{
+ unsigned long started;
+ unsigned long completed;
const unsigned long shortdelay_us = 200;
const unsigned long longdelay_ms = 50;
+ unsigned long long ts;
/* We want a short delay sometimes to make a reader delay the grace
* period, and we want a long delay occasionally to trigger
* force_quiescent_state. */
- if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
+ if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
+ started = cur_ops->completed();
+ ts = rcu_trace_clock_local();
mdelay(longdelay_ms);
+ completed = cur_ops->completed();
+ do_trace_rcu_torture_read(cur_ops->name, NULL, ts,
+ started, completed);
+ }
if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
udelay(shortdelay_us);
#ifdef CONFIG_PREEMPT
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 69a5611a7e7c..96c52e43f7ca 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1304,7 +1304,8 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
if (!rcu_kick_kthreads)
return;
j = READ_ONCE(rsp->jiffies_kick_kthreads);
- if (time_after(jiffies, j) && rsp->gp_kthread) {
+ if (time_after(jiffies, j) && rsp->gp_kthread &&
+ (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) {
WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
rcu_ftrace_dump(DUMP_ALL);
wake_up_process(rsp->gp_kthread);
@@ -2828,8 +2829,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
* Also schedule RCU core processing.
*
* This function must be called from hardirq context. It is normally
- * invoked from the scheduling-clock interrupt. If rcu_pending returns
- * false, there is no point in invoking rcu_check_callbacks().
+ * invoked from the scheduling-clock interrupt.
*/
void rcu_check_callbacks(int user)
{
@@ -3121,7 +3121,9 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
unsigned long flags;
struct rcu_data *rdp;
- WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
+ /* Misaligned rcu_head! */
+ WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
+
if (debug_rcu_head_queue(head)) {
/* Probable double call_rcu(), so leak the callback. */
WRITE_ONCE(head->func, rcu_leak_callback);
@@ -3130,13 +3132,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
}
head->func = func;
head->next = NULL;
-
- /*
- * Opportunistically note grace-period endings and beginnings.
- * Note that we might see a beginning right after we see an
- * end, but never vice versa, since this CPU has to pass through
- * a quiescent state betweentimes.
- */
local_irq_save(flags);
rdp = this_cpu_ptr(rsp->rda);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e99a5234d9ed..fe98dd24adf8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -404,6 +404,7 @@ struct rcu_data {
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
+ int exp_dynticks_snap; /* Double-check need for IPI. */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 24343eb87b58..d3053e99fdb6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -358,8 +358,10 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+ rdp->exp_dynticks_snap =
+ atomic_add_return(0, &rdtp->dynticks);
if (raw_smp_processor_id() == cpu ||
- !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
+ !(rdp->exp_dynticks_snap & 0x1) ||
!(rnp->qsmaskinitnext & rdp->grpmask))
mask_ofl_test |= rdp->grpmask;
}
@@ -377,9 +379,17 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
/* IPI the remaining CPUs for expedited quiescent state. */
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
if (!(mask_ofl_ipi & mask))
continue;
retry_ipi:
+ if (atomic_add_return(0, &rdtp->dynticks) !=
+ rdp->exp_dynticks_snap) {
+ mask_ofl_test |= mask;
+ continue;
+ }
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0c6366b77062..94b28ae4e254 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1430,7 +1430,8 @@ config RCU_TRACE
select TRACE_CLOCK
help
This option provides tracing in RCU which presents stats
- in debugfs for debugging RCU implementation.
+ in debugfs for debugging RCU implementation. It also enables
+ additional tracepoints for ftrace-style event tracing.
Say Y here if you want to enable RCU tracing
Say N if you are unsure.
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
index 05838f6f2ebe..ccc240275d1c 100644
--- a/tools/testing/selftests/rcutorture/.gitignore
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -1,6 +1,4 @@
initrd
-linux-2.6
b[0-9]*
-rcu-test-image
res
*.swp
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 0aed965f0062..3b3c1b693ee1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -303,6 +303,7 @@ then
fi
___EOF___
awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
-v CONFIGDIR="$CONFIGFRAG/" \
-v KVM="$KVM" \
-v ncpus=$cpus \
@@ -375,6 +376,10 @@ function dump(first, pastlast, batchnum)
njitter = ncpus;
else
njitter = ja[1];
+ if (TORTURE_BUILDONLY && njitter != 0) {
+ njitter = 0;
+ print "echo Build-only run, so suppressing jitter >> " rd "/log"
+ }
for (j = 0; j < njitter; j++)
print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
print "wait"