diff options
-rw-r--r-- | Documentation/RCU/Design/Requirements/Requirements.html | 25 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.txt | 2 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 5 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 11 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 17 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 1 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 12 | ||||
-rw-r--r-- | lib/Kconfig.debug | 3 | ||||
-rw-r--r-- | tools/testing/selftests/rcutorture/.gitignore | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/rcutorture/bin/kvm.sh | 5 |
10 files changed, 64 insertions, 19 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index a4d3838130e4..39bcb74ea733 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -547,7 +547,7 @@ The <tt>rcu_access_pointer()</tt> on line 6 is similar to It could reuse a value formerly fetched from this same pointer. It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time manner, resulting in <i>load tearing</i>, in turn resulting a bytewise - mash-up of two distince pointer values. + mash-up of two distinct pointer values. It might even use value-speculation optimizations, where it makes a wrong guess, but by the time it gets around to checking the value, an update has changed the pointer to match the wrong guess. @@ -659,6 +659,29 @@ systems with more than one CPU: In other words, a given instance of <tt>synchronize_rcu()</tt> can avoid waiting on a given RCU read-side critical section only if it can prove that <tt>synchronize_rcu()</tt> started first. + + <p> + A related question is “When <tt>rcu_read_lock()</tt> + doesn't generate any code, why does it matter how it relates + to a grace period?” + The answer is that it is not the relationship of + <tt>rcu_read_lock()</tt> itself that is important, but rather + the relationship of the code within the enclosed RCU read-side + critical section to the code preceding and following the + grace period. + If we take this viewpoint, then a given RCU read-side critical + section begins before a given grace period when some access + preceding the grace period observes the effect of some access + within the critical section, in which case none of the accesses + within the critical section may observe the effects of any + access following the grace period. + + <p> + As of late 2016, mathematical models of RCU take this + viewpoint, for example, see slides 62 and 63 + of the + <a href="http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.2016.10.04c.LCE.pdf">2016 LinuxCon EU</a> + presentation. </font></td></tr> <tr><td> </td></tr> </table> diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 204422719197..5cbd8b2395b8 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -237,7 +237,7 @@ rcu_dereference() The reader uses rcu_dereference() to fetch an RCU-protected pointer, which returns a value that may then be safely - dereferenced. Note that rcu_deference() does not actually + dereferenced. Note that rcu_dereference() does not actually dereference the pointer, instead, it protects the pointer for later dereferencing. It also executes any needed memory-barrier instructions for a given CPU architecture. Currently, only Alpha diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d3e756539d44..9d4f9b3a2b7b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -698,7 +698,10 @@ TRACE_EVENT(rcu_batch_end, /* * Tracepoint for rcutorture readers. The first argument is the name * of the RCU flavor from rcutorture's viewpoint and the second argument - * is the callback address. + * is the callback address. The third argument is the start time in + * seconds, and the last two arguments are the grace period numbers + * at the beginning and end of the read, respectively. Note that the + * callback address can be NULL. */ TRACE_EVENT(rcu_torture_read, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bf08fee53dc7..87c51225ceec 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -289,15 +289,24 @@ static int rcu_torture_read_lock(void) __acquires(RCU) static void rcu_read_delay(struct torture_random_state *rrsp) { + unsigned long started; + unsigned long completed; const unsigned long shortdelay_us = 200; const unsigned long longdelay_ms = 50; + unsigned long long ts; /* We want a short delay sometimes to make a reader delay the grace * period, and we want a long delay occasionally to trigger * force_quiescent_state. */ - if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) + if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { + started = cur_ops->completed(); + ts = rcu_trace_clock_local(); mdelay(longdelay_ms); + completed = cur_ops->completed(); + do_trace_rcu_torture_read(cur_ops->name, NULL, ts, + started, completed); + } if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); #ifdef CONFIG_PREEMPT diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 69a5611a7e7c..96c52e43f7ca 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1304,7 +1304,8 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp) if (!rcu_kick_kthreads) return; j = READ_ONCE(rsp->jiffies_kick_kthreads); - if (time_after(jiffies, j) && rsp->gp_kthread) { + if (time_after(jiffies, j) && rsp->gp_kthread && + (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) { WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name); rcu_ftrace_dump(DUMP_ALL); wake_up_process(rsp->gp_kthread); @@ -2828,8 +2829,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) * Also schedule RCU core processing. * * This function must be called from hardirq context. It is normally - * invoked from the scheduling-clock interrupt. If rcu_pending returns - * false, there is no point in invoking rcu_check_callbacks(). + * invoked from the scheduling-clock interrupt. */ void rcu_check_callbacks(int user) { @@ -3121,7 +3121,9 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, unsigned long flags; struct rcu_data *rdp; - WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */ + /* Misaligned rcu_head! */ + WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); + if (debug_rcu_head_queue(head)) { /* Probable double call_rcu(), so leak the callback. */ WRITE_ONCE(head->func, rcu_leak_callback); @@ -3130,13 +3132,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, } head->func = func; head->next = NULL; - - /* - * Opportunistically note grace-period endings and beginnings. - * Note that we might see a beginning right after we see an - * end, but never vice versa, since this CPU has to pass through - * a quiescent state betweentimes. - */ local_irq_save(flags); rdp = this_cpu_ptr(rsp->rda); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e99a5234d9ed..fe98dd24adf8 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -404,6 +404,7 @@ struct rcu_data { atomic_long_t exp_workdone1; /* # done by others #1. */ atomic_long_t exp_workdone2; /* # done by others #2. */ atomic_long_t exp_workdone3; /* # done by others #3. */ + int exp_dynticks_snap; /* Double-check need for IPI. */ /* 7) Callback offloading. */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 24343eb87b58..d3053e99fdb6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -358,8 +358,10 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + rdp->exp_dynticks_snap = + atomic_add_return(0, &rdtp->dynticks); if (raw_smp_processor_id() == cpu || - !(atomic_add_return(0, &rdtp->dynticks) & 0x1) || + !(rdp->exp_dynticks_snap & 0x1) || !(rnp->qsmaskinitnext & rdp->grpmask)) mask_ofl_test |= rdp->grpmask; } @@ -377,9 +379,17 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, /* IPI the remaining CPUs for expedited quiescent state. */ for_each_leaf_node_possible_cpu(rnp, cpu) { unsigned long mask = leaf_node_cpu_bit(rnp, cpu); + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + if (!(mask_ofl_ipi & mask)) continue; retry_ipi: + if (atomic_add_return(0, &rdtp->dynticks) != + rdp->exp_dynticks_snap) { + mask_ofl_test |= mask; + continue; + } ret = smp_call_function_single(cpu, func, rsp, 0); if (!ret) { mask_ofl_ipi &= ~mask; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0c6366b77062..94b28ae4e254 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1430,7 +1430,8 @@ config RCU_TRACE select TRACE_CLOCK help This option provides tracing in RCU which presents stats - in debugfs for debugging RCU implementation. + in debugfs for debugging RCU implementation. It also enables + additional tracepoints for ftrace-style event tracing. Say Y here if you want to enable RCU tracing Say N if you are unsure. diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore index 05838f6f2ebe..ccc240275d1c 100644 --- a/tools/testing/selftests/rcutorture/.gitignore +++ b/tools/testing/selftests/rcutorture/.gitignore @@ -1,6 +1,4 @@ initrd -linux-2.6 b[0-9]* -rcu-test-image res *.swp diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 0aed965f0062..3b3c1b693ee1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -303,6 +303,7 @@ then fi ___EOF___ awk < $T/cfgcpu.pack \ + -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \ -v CONFIGDIR="$CONFIGFRAG/" \ -v KVM="$KVM" \ -v ncpus=$cpus \ @@ -375,6 +376,10 @@ function dump(first, pastlast, batchnum) njitter = ncpus; else njitter = ja[1]; + if (TORTURE_BUILDONLY && njitter != 0) { + njitter = 0; + print "echo Build-only run, so suppressing jitter >> " rd "/log" + } for (j = 0; j < njitter; j++) print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&" print "wait" |