3 files changed, 25 insertions, 15 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b310b40a49a2..c5c8509054ef 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3257,18 +3257,11 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 
 static int synchronize_sched_expedited_cpu_stop(void *data)
 {
-	/*
-	 * There must be a full memory barrier on each affected CPU
-	 * between the time that try_stop_cpus() is called and the
-	 * time that it returns.
-	 *
-	 * In the current initial implementation of cpu_stop, the
-	 * above condition is already met when the control reaches
-	 * this point and the following smp_mb() is not strictly
-	 * necessary.  Do smp_mb() anyway for documentation and
-	 * robustness against future implementation changes.
-	 */
-	smp_mb(); /* See above comment block. */
+	struct rcu_state *rsp = data;
+
+	/* We are here: If we are last, do the wakeup. */
+	if (atomic_dec_and_test(&rsp->expedited_need_qs))
+		wake_up(&rsp->expedited_wq);
 	return 0;
 }
 
@@ -3308,9 +3301,9 @@ void synchronize_sched_expedited(void)
 {
 	int cpu;
 	long s;
-	struct rcu_state *rsp = &rcu_sched_state;
 	struct rcu_node *rnp0;
 	struct rcu_node *rnp1 = NULL;
+	struct rcu_state *rsp = &rcu_sched_state;
 
 	/* Take a snapshot of the sequence number.  */
 	smp_mb(); /* Caller's modifications seen first by other CPUs. */
@@ -3351,16 +3344,26 @@ void synchronize_sched_expedited(void)
 	WARN_ON_ONCE(!(rsp->expedited_sequence & 0x1));
 
 	/* Stop each CPU that is online, non-idle, and not us. */
+	init_waitqueue_head(&rsp->expedited_wq);
+	atomic_set(&rsp->expedited_need_qs, 1); /* Extra count avoids race. */
 	for_each_online_cpu(cpu) {
+		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 		struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 
 		/* Skip our CPU and any idle CPUs. */
 		if (raw_smp_processor_id() == cpu ||
 		    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
 			continue;
-		stop_one_cpu(cpu, synchronize_sched_expedited_cpu_stop, NULL);
+		atomic_inc(&rsp->expedited_need_qs);
+		stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop,
+				    rsp, &rdp->exp_stop_work);
 	}
 
+	/* Remove extra count and, if necessary, wait for CPUs to stop. */
+	if (!atomic_dec_and_test(&rsp->expedited_need_qs))
+		wait_event(rsp->expedited_wq,
+			   !atomic_read(&rsp->expedited_need_qs));
+
 	smp_mb(); /* Ensure expedited GP seen before counter increment. */
 	WRITE_ONCE(rsp->expedited_sequence, rsp->expedited_sequence + 1);
 	WARN_ON_ONCE(rsp->expedited_sequence & 0x1);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2ef036b356f7..4edc277d08eb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,6 +27,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/stop_machine.h>
 
 /*
  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -298,6 +299,9 @@ struct rcu_data {
 					/*  ticks this CPU has handled */
 					/*  during and after the last grace */
 					/* period it is aware of. */
+	struct cpu_stop_work exp_stop_work;
+					/* Expedited grace-period control */
+					/*  for CPU stopping. */
 
 	/* 2) batch handling */
 	/*
@@ -491,6 +495,8 @@ struct rcu_state {
 	atomic_long_t expedited_workdone1;	/* # done by others #1. */
 	atomic_long_t expedited_workdone2;	/* # done by others #2. */
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
+	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
+	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
 						/*  force_quiescent_state(). */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index d2aab8dcd58e..36c04b46d3b8 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,12 +185,13 @@ static int show_rcuexp(struct seq_file *m, void *v)
 {
 	struct rcu_state *rsp = (struct rcu_state *)m->private;
 
-	seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu\n",
+	seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n",
 		   rsp->expedited_sequence,
 		   atomic_long_read(&rsp->expedited_tryfail),
 		   atomic_long_read(&rsp->expedited_workdone1),
 		   atomic_long_read(&rsp->expedited_workdone2),
 		   atomic_long_read(&rsp->expedited_normal),
+		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);
 	return 0;
 }