From f96e9232e04856c781d4f71923a46dd3f7b429fa Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:00 -0800 Subject: rcu: Adjust force_quiescent_state() locking, step 1 This causes rnp->lock to be held on entry to force_quiescent_state()'s switch statement. This is a first step towards prohibiting starting grace periods while force_quiescent_state() is executing, which will reduce the number and complexity of races that force_quiescent_state() is involved in. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501455-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 53ae9598f798..eae331da6bee 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1204,7 +1204,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) } if (relaxed && (long)(rsp->jiffies_force_qs - jiffies) >= 0) - goto unlock_ret; /* no emergency and done recently. */ + goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); lastcomp = rsp->gpnum - 1; @@ -1213,31 +1213,32 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; spin_unlock(&rnp->lock); - goto unlock_ret; /* no GP in progress, time updated. */ + goto unlock_fqs_ret; /* no GP in progress, time updated. */ } - spin_unlock(&rnp->lock); switch (signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: + spin_unlock(&rnp->lock); break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: + spin_unlock(&rnp->lock); if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ if (rcu_process_dyntick(rsp, lastcomp, dyntick_save_progress_counter)) - goto unlock_ret; + goto unlock_fqs_ret; + spin_lock(&rnp->lock); /* fall into next case. */ case RCU_SAVE_COMPLETED: /* Update state, record completion counter. */ forcenow = 0; - spin_lock(&rnp->lock); if (lastcomp + 1 == rsp->gpnum && lastcomp == rsp->completed && rsp->signaled == signaled) { @@ -1245,23 +1246,31 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->completed_fqs = lastcomp; forcenow = signaled == RCU_SAVE_COMPLETED; } - spin_unlock(&rnp->lock); - if (!forcenow) + if (!forcenow) { + spin_unlock(&rnp->lock); break; + } /* fall into next case. */ case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ + spin_unlock(&rnp->lock); if (rcu_process_dyntick(rsp, rsp->completed_fqs, rcu_implicit_dynticks_qs)) - goto unlock_ret; + goto unlock_fqs_ret; /* Leave state in case more forcing is required. */ break; + + default: + + spin_unlock(&rnp->lock); + WARN_ON_ONCE(1); + break; } -unlock_ret: +unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } -- cgit v1.2.3 From 559569acf94f538b56bd6eead80b439d6a78cdff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:01 -0800 Subject: rcu: Adjust force_quiescent_state() locking, step 2 This patch releases rnp->lock after the end of force_quiescent_state()'s switch statement. This is a second step towards prohibiting starting grace periods while force_quiescent_state() is executing, which will reduce the number and complexity of races that force_quiescent_state() is involved in. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501994-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index eae331da6bee..d42ad30c4d70 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1219,7 +1219,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_GP_IDLE: case RCU_GP_INIT: - spin_unlock(&rnp->lock); break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: @@ -1246,10 +1245,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->completed_fqs = lastcomp; forcenow = signaled == RCU_SAVE_COMPLETED; } - if (!forcenow) { - spin_unlock(&rnp->lock); + if (!forcenow) break; - } /* fall into next case. */ case RCU_FORCE_QS: @@ -1262,14 +1259,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Leave state in case more forcing is required. */ - break; - - default: - - spin_unlock(&rnp->lock); - WARN_ON_ONCE(1); + spin_lock(&rnp->lock); break; } + spin_unlock(&rnp->lock); unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } -- cgit v1.2.3 From 07079d5357a4d53c2b13126c4a38fb40e6e04966 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:02 -0800 Subject: rcu: Prohibit starting new grace periods while forcing quiescent states Reduce the number and variety of race conditions by prohibiting the start of a new grace period while force_quiescent_state() is active. A new fqs_active flag in the rcu_state structure is used to trace whether or not force_quiescent_state() is active, and this new flag is tested by rcu_start_gp(). If the CPU that closed out the last grace period needs another grace period, this new grace period may be delayed up to one scheduling-clock tick, but it will eventually get started. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <126264655052-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 31 +++++++++++++++++-------------- kernel/rcutree.h | 2 ++ 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d42ad30c4d70..41688ff60e07 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -659,7 +659,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = rsp->rda[smp_processor_id()]; struct rcu_node *rnp = rcu_get_root(rsp); - if (!cpu_needs_another_gp(rsp, rdp)) { + if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { if (rnp->completed == rsp->completed) { spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -1195,6 +1195,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; u8 forcenow; + u8 gpdone; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1206,15 +1207,16 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) (long)(rsp->jiffies_force_qs - jiffies) >= 0) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; - spin_lock(&rnp->lock); + spin_lock(&rnp->lock); /* irqs already disabled */ lastcomp = rsp->gpnum - 1; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; - spin_unlock(&rnp->lock); + spin_unlock(&rnp->lock); /* irqs remain disabled */ goto unlock_fqs_ret; /* no GP in progress, time updated. */ } + rsp->fqs_active = 1; switch (signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: @@ -1223,15 +1225,16 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_SAVE_DYNTICK: - spin_unlock(&rnp->lock); + spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - if (rcu_process_dyntick(rsp, lastcomp, - dyntick_save_progress_counter)) - goto unlock_fqs_ret; - spin_lock(&rnp->lock); + gpdone = rcu_process_dyntick(rsp, lastcomp, + dyntick_save_progress_counter); + spin_lock(&rnp->lock); /* irqs already disabled */ + if (gpdone) + break; /* fall into next case. */ case RCU_SAVE_COMPLETED: @@ -1252,17 +1255,17 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - spin_unlock(&rnp->lock); - if (rcu_process_dyntick(rsp, rsp->completed_fqs, - rcu_implicit_dynticks_qs)) - goto unlock_fqs_ret; + spin_unlock(&rnp->lock); /* irqs remain disabled */ + gpdone = rcu_process_dyntick(rsp, rsp->completed_fqs, + rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ - spin_lock(&rnp->lock); + spin_lock(&rnp->lock); /* irqs already disabled */ break; } - spin_unlock(&rnp->lock); + rsp->fqs_active = 0; + spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d2a0046f63b2..dc386a7c634f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -277,6 +277,8 @@ struct rcu_state { u8 signaled ____cacheline_internodealigned_in_smp; /* Force QS state. */ + u8 fqs_active; /* force_quiescent_state() */ + /* is running. */ long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ -- cgit v1.2.3 From f3a8b5c6aa543bd87764418d63632eb65b80e2f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:03 -0800 Subject: rcu: Eliminate local variable signaled from force_quiescent_state() Because the root rcu_node lock is held across entry to the switch statement in force_quiescent_state(), it is no longer necessary to snapshot rsp->signaled to a local variable. Eliminate both the snapshotting and the local variable. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646550602-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 41688ff60e07..1d8cfb1711fd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1193,7 +1193,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); - u8 signaled; u8 forcenow; u8 gpdone; @@ -1209,7 +1208,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->n_force_qs++; spin_lock(&rnp->lock); /* irqs already disabled */ lastcomp = rsp->gpnum - 1; - signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; @@ -1217,7 +1215,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_fqs_ret; /* no GP in progress, time updated. */ } rsp->fqs_active = 1; - switch (signaled) { + switch (rsp->signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: @@ -1242,11 +1240,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ forcenow = 0; if (lastcomp + 1 == rsp->gpnum && - lastcomp == rsp->completed && - rsp->signaled == signaled) { + lastcomp == rsp->completed) { + forcenow = rsp->signaled == RCU_SAVE_COMPLETED; rsp->signaled = RCU_FORCE_QS; rsp->completed_fqs = lastcomp; - forcenow = signaled == RCU_SAVE_COMPLETED; } if (!forcenow) break; -- cgit v1.2.3 From 39c0bbfc07c6e28db7346d0e11106f2d045d3035 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:04 -0800 Subject: rcu: Eliminate local variable lastcomp from force_quiescent_state() Because rsp->fqs_active is set to 1 across force_quiescent_state()'s switch statement, rcu_start_gp() will refrain from starting a new grace period during this time. Therefore, rsp->gpnum is constant, and can be propagated to all uses of lastcomp, eliminating this local variable. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465502985-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 10 +++------- kernel/rcutree.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 1d8cfb1711fd..62b64332effb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1191,7 +1191,6 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; - long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); u8 forcenow; u8 gpdone; @@ -1207,7 +1206,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); /* irqs already disabled */ - lastcomp = rsp->gpnum - 1; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; @@ -1228,7 +1226,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, lastcomp, + gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (gpdone) @@ -1239,11 +1237,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ forcenow = 0; - if (lastcomp + 1 == rsp->gpnum && - lastcomp == rsp->completed) { + if (rsp->gpnum - 1 == rsp->completed) { forcenow = rsp->signaled == RCU_SAVE_COMPLETED; rsp->signaled = RCU_FORCE_QS; - rsp->completed_fqs = lastcomp; } if (!forcenow) break; @@ -1253,7 +1249,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rsp->completed_fqs, + gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index dc386a7c634f..534856121b06 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -296,8 +296,6 @@ struct rcu_state { long orphan_qlen; /* Number of orphaned cbs. */ spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ - long completed_fqs; /* Value of completed @ snap. */ - /* Protected by fqslock. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ -- cgit v1.2.3 From eb1ba45f1e7f6e626fefc063b340c7cbec9bd8c7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:05 -0800 Subject: rcu: Eliminate second argument of rcu_process_dyntick() At this point, the second argument to all calls to rcu_process_dyntick() is a function of the same field of the structure passed in as the first argument, namely, rsp->gpnum-1. So propagate rsp->gpnum-1 to all uses of the second argument within rcu_process_dyntick() and then eliminate the second argument. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465503786-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 62b64332effb..c7d00700fc4e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1147,7 +1147,7 @@ void rcu_check_callbacks(int cpu, int user) * Returns 1 if the current grace period ends while scanning (possibly * because we made it end). */ -static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, +static int rcu_process_dyntick(struct rcu_state *rsp, int (*f)(struct rcu_data *)) { unsigned long bit; @@ -1159,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rnp->completed != lastcomp) { + if (rnp->completed != rsp->gpnum - 1) { spin_unlock_irqrestore(&rnp->lock, flags); return 1; } @@ -1173,7 +1173,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rnp->completed == lastcomp) { + if (mask != 0 && rnp->completed == rsp->gpnum - 1) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1226,7 +1226,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, + gpdone = rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (gpdone) @@ -1249,8 +1249,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, - rcu_implicit_dynticks_qs); + gpdone = rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From 0f10dc826646134dce3e5751512b87d30f3903e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:06 -0800 Subject: rcu: Eliminate rcu_process_dyntick() return value Because a new grace period cannot start while we are executing within the force_quiescent_state() function's switch statement, if any test within that switch statement or within any function called from that switch statement shows that the current grace period has ended, we can safely re-do that test any time before we leave the switch statement. This means that we no longer need a return value from rcu_process_dyntick(), as we can simply invoke rcu_gp_in_progress() to check whether the old grace period has finished -- there is no longer any need to worry about whether or not a new grace period has been started. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501857-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index c7d00700fc4e..e4971192fa9c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1144,11 +1144,9 @@ void rcu_check_callbacks(int cpu, int user) /* * Scan the leaf rcu_node structures, processing dyntick state for any that * have not yet encountered a quiescent state, using the function specified. - * Returns 1 if the current grace period ends while scanning (possibly - * because we made it end). */ -static int rcu_process_dyntick(struct rcu_state *rsp, - int (*f)(struct rcu_data *)) +static void rcu_process_dyntick(struct rcu_state *rsp, + int (*f)(struct rcu_data *)) { unsigned long bit; int cpu; @@ -1161,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, spin_lock_irqsave(&rnp->lock, flags); if (rnp->completed != rsp->gpnum - 1) { spin_unlock_irqrestore(&rnp->lock, flags); - return 1; + return; } if (rnp->qsmask == 0) { spin_unlock_irqrestore(&rnp->lock, flags); @@ -1181,7 +1179,6 @@ static int rcu_process_dyntick(struct rcu_state *rsp, } spin_unlock_irqrestore(&rnp->lock, flags); } - return 0; } /* @@ -1193,7 +1190,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); u8 forcenow; - u8 gpdone; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1226,10 +1222,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, - dyntick_save_progress_counter); + rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ - if (gpdone) + if (!rcu_gp_in_progress(rsp)) break; /* fall into next case. */ @@ -1249,7 +1244,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); + rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From ee47eb9f4da6f44af965d6d049e77ee8c8a4b822 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:07 -0800 Subject: rcu: Remove leg of force_quiescent_state() switch statement The comparisons of rsp->gpnum nad rsp->completed in rcu_process_dyntick() and force_quiescent_state() can be replaced by the much more clear rcu_gp_in_progress() predicate function. After doing this, it becomes clear that the RCU_SAVE_COMPLETED leg of the force_quiescent_state() function's switch statement is almost completely a no-op. A small change to the RCU_SAVE_DYNTICK leg renders it a complete no-op, after which it can be removed. Doing so also eliminates the forcenow local variable from force_quiescent_state(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501781-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 22 +++++----------------- kernel/rcutree.h | 5 ++--- 2 files changed, 7 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e4971192fa9c..6268f37adfc4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1144,6 +1144,7 @@ void rcu_check_callbacks(int cpu, int user) /* * Scan the leaf rcu_node structures, processing dyntick state for any that * have not yet encountered a quiescent state, using the function specified. + * The caller must have suppressed start of new grace periods. */ static void rcu_process_dyntick(struct rcu_state *rsp, int (*f)(struct rcu_data *)) @@ -1157,7 +1158,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rnp->completed != rsp->gpnum - 1) { + if (!rcu_gp_in_progress(rsp)) { spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -1171,7 +1172,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rnp->completed == rsp->gpnum - 1) { + if (mask != 0 && rcu_gp_in_progress(rsp)) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1189,7 +1190,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - u8 forcenow; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1224,21 +1224,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Record dyntick-idle state. */ rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ - if (!rcu_gp_in_progress(rsp)) - break; - /* fall into next case. */ - - case RCU_SAVE_COMPLETED: - - /* Update state, record completion counter. */ - forcenow = 0; - if (rsp->gpnum - 1 == rsp->completed) { - forcenow = rsp->signaled == RCU_SAVE_COMPLETED; + if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; - } - if (!forcenow) - break; - /* fall into next case. */ + break; case RCU_FORCE_QS: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 534856121b06..edb6fae0fa94 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -237,12 +237,11 @@ struct rcu_data { #define RCU_GP_IDLE 0 /* No grace period in progress. */ #define RCU_GP_INIT 1 /* Grace period being initialized. */ #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ -#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */ -#define RCU_FORCE_QS 4 /* Need to force quiescent state. */ +#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED +#define RCU_SIGNAL_INIT RCU_FORCE_QS #endif /* #else #ifdef CONFIG_NO_HZ */ #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ -- cgit v1.2.3 From 45f014c52eef022873b19d6a20eb0ec9668f2b09 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:08 -0800 Subject: rcu: Remove redundant grace-period check The rcu_process_dyntick() function checks twice for the end of the current grace period. However, it holds the current rcu_node structure's ->lock field throughout, and doesn't get to the second call to rcu_gp_in_progress() unless there is at least one CPU corresponding to this rcu_node structure that has not yet checked in for the current grace period, which would prevent the current grace period from ending. So the current grace period cannot have ended, and the second check is redundant, so remove it. Also, given that this function is used even with !CONFIG_NO_HZ, its name is quite misleading. Change from rcu_process_dyntick() to force_qs_rnp(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646550562-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6268f37adfc4..d9202857d3ad 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1146,8 +1146,7 @@ void rcu_check_callbacks(int cpu, int user) * have not yet encountered a quiescent state, using the function specified. * The caller must have suppressed start of new grace periods. */ -static void rcu_process_dyntick(struct rcu_state *rsp, - int (*f)(struct rcu_data *)) +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) { unsigned long bit; int cpu; @@ -1172,7 +1171,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rcu_gp_in_progress(rsp)) { + if (mask != 0) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1222,7 +1221,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - rcu_process_dyntick(rsp, dyntick_save_progress_counter); + force_qs_rnp(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; @@ -1232,7 +1231,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From 46a1e34eda805501a8b32f26394faa435149f6d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:09 -0800 Subject: rcu: Make force_quiescent_state() start grace period if needed Grace periods cannot be started while force_quiescent_state() is active. This is OK in that the affected CPUs will try again later, but it does induce needless grace-period delays. This patch causes rcu_start_gp() to record a failed attempt to start a grace period. When force_quiescent_state() prepares to return, it then starts the grace period if there was such a failed attempt. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501854-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 8 ++++++++ kernel/rcutree.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d9202857d3ad..55e8f6ef8195 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -660,6 +660,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { + if (cpu_needs_another_gp(rsp, rdp)) + rsp->fqs_need_gp = 1; if (rnp->completed == rsp->completed) { spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -1239,6 +1241,12 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; } rsp->fqs_active = 0; + if (rsp->fqs_need_gp) { + spin_unlock(&rsp->fqslock); /* irqs remain disabled */ + rsp->fqs_need_gp = 0; + rcu_start_gp(rsp, flags); /* releases rnp->lock */ + return; + } spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index edb6fae0fa94..bd5d78ad1c48 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -278,6 +278,11 @@ struct rcu_state { /* Force QS state. */ u8 fqs_active; /* force_quiescent_state() */ /* is running. */ + u8 fqs_need_gp; /* A CPU was prevented from */ + /* starting a new grace */ + /* period because */ + /* force_quiescent_state() */ + /* was running. */ long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ -- cgit v1.2.3 From bf66f18e79e34c421bbd8f6511e2c556b779df2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:10 -0800 Subject: rcu: Add force_quiescent_state() testing to rcutorture Add force_quiescent_state() testing to rcutorture, with a separate thread that repeatedly invokes force_quiescent_state() in bursts. This can greatly increase the probability of encountering certain types of race conditions. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646551116-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 12 ++++++++ include/linux/rcutree.h | 3 ++ kernel/rcutorture.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- kernel/rcutree.c | 18 +++++++++++ kernel/rcutree_plugin.h | 19 ++++++++++++ 5 files changed, 130 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 96cc307ed9f4..2b70d4e37383 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -62,6 +62,18 @@ static inline long rcu_batches_completed_bh(void) extern int rcu_expedited_torture_stats(char *page); +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + #define synchronize_rcu synchronize_sched static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 8044b1b94333..704a010f686c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -99,6 +99,9 @@ extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); +extern void rcu_force_quiescent_state(void); +extern void rcu_bh_force_quiescent_state(void); +extern void rcu_sched_force_quiescent_state(void); #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9bb52177af02..adda92bfafac 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ static int stutter = 5; /* Start/stop testing interval (in sec) */ static int irqreader = 1; /* RCU readers from irq (timers). */ +static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ +static int fqs_holdoff = 0; /* Hold time within burst (us). */ +static int fqs_stutter = 3; /* Wait time between bursts (s). */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -79,6 +82,12 @@ module_param(stutter, int, 0444); MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); module_param(irqreader, int, 0444); MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); +module_param(fqs_duration, int, 0444); +MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)"); +module_param(fqs_holdoff, int, 0444); +MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); +module_param(fqs_stutter, int, 0444); +MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -99,6 +108,7 @@ static struct task_struct **reader_tasks; static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; +static struct task_struct *fqs_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -263,6 +273,7 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*cb_barrier)(void); + void (*fqs)(void); int (*stats)(char *page); int irq_capable; char *name; @@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = rcu_barrier, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu" @@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_sync" @@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_expedited, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_expedited" @@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = { .deferred_free = rcu_bh_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = rcu_barrier_bh, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh" @@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh_sync" @@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = { .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "sched" @@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .name = "sched_sync" }; @@ -650,11 +668,44 @@ static struct rcu_torture_ops sched_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = rcu_expedited_torture_stats, .irq_capable = 1, .name = "sched_expedited" }; +/* + * RCU torture force-quiescent-state kthread. Repeatedly induces + * bursts of calls to force_quiescent_state(), increasing the probability + * of occurrence of some important types of race conditions. + */ +static int +rcu_torture_fqs(void *arg) +{ + unsigned long fqs_resume_time; + int fqs_burst_remaining; + + VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); + do { + fqs_resume_time = jiffies + fqs_stutter * HZ; + while (jiffies - fqs_resume_time > LONG_MAX) { + schedule_timeout_interruptible(1); + } + fqs_burst_remaining = fqs_duration; + while (fqs_burst_remaining > 0) { + cur_ops->fqs(); + udelay(fqs_holdoff); + fqs_burst_remaining -= fqs_holdoff; + } + rcu_stutter_wait("rcu_torture_fqs"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_fqs"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + /* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure @@ -1030,10 +1081,11 @@ rcu_torture_print_module_parms(char *tag) printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d\n", + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); } static struct notifier_block rcutorture_nb = { @@ -1109,6 +1161,12 @@ rcu_torture_cleanup(void) } stats_task = NULL; + if (fqs_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); + kthread_stop(fqs_task); + } + fqs_task = NULL; + /* Wait for all RCU callbacks to fire. */ if (cur_ops->cb_barrier != NULL) @@ -1154,6 +1212,11 @@ rcu_torture_init(void) mutex_unlock(&fullstop_mutex); return -EINVAL; } + if (cur_ops->fqs == NULL && fqs_duration != 0) { + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " + "fqs_duration, fqs disabled.\n"); + fqs_duration = 0; + } if (cur_ops->init) cur_ops->init(); /* no "goto unwind" prior to this point!!! */ @@ -1282,6 +1345,19 @@ rcu_torture_init(void) goto unwind; } } + if (fqs_duration < 0) + fqs_duration = 0; + if (fqs_duration) { + /* Create the stutter thread */ + fqs_task = kthread_run(rcu_torture_fqs, NULL, + "rcu_torture_fqs"); + if (IS_ERR(fqs_task)) { + firsterr = PTR_ERR(fqs_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); + fqs_task = NULL; + goto unwind; + } + } register_reboot_notifier(&rcutorture_nb); mutex_unlock(&fullstop_mutex); return 0; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 55e8f6ef8195..0a4c32879398 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -156,6 +156,24 @@ long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Force a quiescent state for RCU BH. + */ +void rcu_bh_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_bh_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); + +/* + * Force a quiescent state for RCU-sched. + */ +void rcu_sched_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_sched_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); + /* * Does the CPU have callbacks ready to be invoked? */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 37fbccdf41d5..f11ebd44b454 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -61,6 +61,15 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for preemptible RCU. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_preempt_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Record a preemptable-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -712,6 +721,16 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for RCU, which, because there is no preemptible + * RCU, becomes the same as rcu-sched. + */ +void rcu_force_quiescent_state(void) +{ + rcu_sched_force_quiescent_state(); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. -- cgit v1.2.3 From cba8244a0f1c277b6b1e48ed6504fa434119e24d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 16:04:01 -0800 Subject: rcu: Add debug check for too many rcu_read_unlock() TREE_PREEMPT_RCU maintains an rcu_read_lock_nesting counter in the task structure, which happens to be a signed int. So this patch adds a check for this counter being negative at the end of __rcu_read_unlock(). This check is under CONFIG_PROVE_LOCKING, so can be thought of as being part of lockdep. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626498423064-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f11ebd44b454..e77cdf321e13 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -304,6 +304,9 @@ void __rcu_read_unlock(void) if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); -- cgit v1.2.3 From b6407e863934965cdc66cbc244d811ceeb6f4d77 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 16:04:02 -0800 Subject: rcu: Give different levels of the rcu_node hierarchy distinct lockdep names Previously, each level of the rcu_node hierarchy had the same rather unimaginative name: "&rcu_node_class[i]". This makes lockdep diagnostics involving these lockdep classes less helpful than would be nice. This patch fixes this by giving each level of the rcu_node hierarchy a distinct name: "rcu_node_level_0", "rcu_node_level_1", and so on. This version of the patch includes improved diagnostics suggested by Josh Triplett and Peter Zijlstra. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626498421830-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a4c32879398..3b13d64b010b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1811,11 +1811,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) */ static void __init rcu_init_one(struct rcu_state *rsp) { + static char *buf[] = { "rcu_node_level_0", + "rcu_node_level_1", + "rcu_node_level_2", + "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ int cpustride = 1; int i; int j; struct rcu_node *rnp; + BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ + /* Initialize the level-tracking arrays. */ for (i = 1; i < NUM_RCU_LVLS; i++) @@ -1829,7 +1835,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { spin_lock_init(&rnp->lock); - lockdep_set_class(&rnp->lock, &rcu_node_class[i]); + lockdep_set_class_and_name(&rnp->lock, + &rcu_node_class[i], buf[i]); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; -- cgit v1.2.3 From 017c426138122c8e9b9f5057fbd0567c37b35247 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Jan 2010 16:10:58 -0800 Subject: rcu: Fix sparse warnings Rename local variable "i" in rcu_init() to avoid conflict with RCU_INIT_FLAVOR(), restrict the scope of RCU_TREE_NONCORE, and make __synchronize_srcu() static. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12635142581560-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 6 +++--- kernel/rcutree.h | 6 ++---- kernel/srcu.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3b13d64b010b..099a255ede4c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1888,7 +1888,7 @@ do { \ void __init rcu_init(void) { - int i; + int cpu; rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR @@ -1908,8 +1908,8 @@ void __init rcu_init(void) * or the scheduler are operational. */ cpu_notifier(rcu_cpu_notify, 0); - for_each_online_cpu(i) - rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); + for_each_online_cpu(cpu) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bd5d78ad1c48..d9d032abd665 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -323,8 +323,6 @@ struct rcu_state { #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ /* GP were moved to root. */ -#ifdef RCU_TREE_NONCORE - /* * RCU implementation internal declarations: */ @@ -339,7 +337,7 @@ extern struct rcu_state rcu_preempt_state; DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#else /* #ifdef RCU_TREE_NONCORE */ +#ifndef RCU_TREE_NONCORE /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); @@ -372,4 +370,4 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_orphanage(void); static void __init __rcu_init_preempt(void); -#endif /* #else #ifdef RCU_TREE_NONCORE */ +#endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/srcu.c b/kernel/srcu.c index 818d7d9aa03c..31b275b9c112 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(srcu_read_unlock); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) +static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) { int idx; -- cgit v1.2.3 From 632ee200130899252508c478ad0e808222573fbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:45 -0800 Subject: rcu: Introduce lockdep-based checking to RCU read-side primitives Inspection is proving insufficient to catch all RCU misuses, which is understandable given that rcu_dereference() might be protected by any of four different flavors of RCU (RCU, RCU-bh, RCU-sched, and SRCU), and might also/instead be protected by any of a number of locking primitives. It is therefore time to enlist the aid of lockdep. This set of patches is inspired by earlier work by Peter Zijlstra and Thomas Gleixner, and takes the following approach: o Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched. o Set up separate lockdep classes for each instance of SRCU. o Create primitives that check for being in an RCU read-side critical section. These return exact answers if lockdep is fully enabled, but if unsure, report being in an RCU read-side critical section. (We want to avoid false positives!) The primitives are: For RCU: rcu_read_lock_held(void) For RCU-bh: rcu_read_lock_bh_held(void) For RCU-sched: rcu_read_lock_sched_held(void) For SRCU: srcu_read_lock_held(struct srcu_struct *sp) o Add rcu_dereference_check(), which takes a second argument in which one places a boolean expression based on the above primitives and/or lockdep_is_held(). o A new kernel configuration parameter, CONFIG_PROVE_RCU, enables rcu_dereference_check(). This depends on CONFIG_PROVE_LOCKING, and should be quite helpful during the transition period while CONFIG_PROVE_RCU-unaware patches are in flight. The existing rcu_dereference() primitive does no checking, but upcoming patches will change that. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 126 +++++++++++++++++++++++++++++++++++++++++++---- include/linux/srcu.h | 87 +++++++++++++++++++++++++++++++- kernel/rcupdate.c | 10 ++++ kernel/rcutorture.c | 12 ++++- kernel/srcu.c | 50 ++++++++++++------- lib/Kconfig.debug | 12 +++++ lib/debug_locks.c | 1 + 7 files changed, 267 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 24440f4bf476..e3d37efe2703 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -78,14 +78,120 @@ extern void rcu_init(void); } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC + extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) # define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif + +extern struct lockdep_map rcu_bh_lock_map; +# define rcu_read_acquire_bh() \ + lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_) + +extern struct lockdep_map rcu_sched_lock_map; +# define rcu_read_acquire_sched() \ + lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release_sched() \ + lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) + +/** + * rcu_read_lock_held - might we be in RCU read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in + * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU read-side critical section unless it can + * prove otherwise. + */ +static inline int rcu_read_lock_held(void) +{ + if (debug_locks) + return lock_is_held(&rcu_lock_map); + return 1; +} + +/** + * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in + * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU-bh read-side critical section unless it can + * prove otherwise. + */ +static inline int rcu_read_lock_bh_held(void) +{ + if (debug_locks) + return lock_is_held(&rcu_bh_lock_map); + return 1; +} + +/** + * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an + * RCU-sched read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU-sched read-side critical section unless it + * can prove otherwise. Note that disabling of preemption (including + * disabling irqs) counts as an RCU-sched read-side critical section. + */ +static inline int rcu_read_lock_sched_held(void) +{ + int lockdep_opinion = 0; + + if (debug_locks) + lockdep_opinion = lock_is_held(&rcu_sched_lock_map); + return lockdep_opinion || preempt_count() != 0; +} + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +# define rcu_read_acquire_bh() do { } while (0) +# define rcu_read_release_bh() do { } while (0) +# define rcu_read_acquire_sched() do { } while (0) +# define rcu_read_release_sched() do { } while (0) + +static inline int rcu_read_lock_held(void) +{ + return 1; +} + +static inline int rcu_read_lock_bh_held(void) +{ + return 1; +} + +static inline int rcu_read_lock_sched_held(void) +{ + return preempt_count() != 0; +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_PROVE_RCU + +/** + * rcu_dereference_check - rcu_dereference with debug checking + * + * Do an rcu_dereference(), but check that the context is correct. + * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to + * ensure that the rcu_dereference_check() executes within an RCU + * read-side critical section. It is also possible to check for + * locks being held, for example, by using lockdep_is_held(). + */ +#define rcu_dereference_check(p, c) \ + ({ \ + if (debug_locks) \ + WARN_ON_ONCE(!(c)); \ + rcu_dereference(p); \ + }) + +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define rcu_dereference_check(p, c) rcu_dereference(p) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. @@ -160,7 +266,7 @@ static inline void rcu_read_lock_bh(void) { __rcu_read_lock_bh(); __acquire(RCU_BH); - rcu_read_acquire(); + rcu_read_acquire_bh(); } /* @@ -170,7 +276,7 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_read_release(); + rcu_read_release_bh(); __release(RCU_BH); __rcu_read_unlock_bh(); } @@ -188,7 +294,7 @@ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); - rcu_read_acquire(); + rcu_read_acquire_sched(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -205,7 +311,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_read_release(); + rcu_read_release_sched(); __release(RCU_SCHED); preempt_enable(); } diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4765d97dcafb..adbe1670b366 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -35,6 +35,9 @@ struct srcu_struct { int completed; struct srcu_struct_array *per_cpu_ref; struct mutex mutex; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; #ifndef CONFIG_PREEMPT @@ -43,12 +46,92 @@ struct srcu_struct { #define srcu_barrier() #endif /* #else #ifndef CONFIG_PREEMPT */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key); + +#define init_srcu_struct(sp) \ +({ \ + static struct lock_class_key __srcu_key; \ + \ + __init_srcu_struct((sp), #sp, &__srcu_key); \ +}) + +# define srcu_read_acquire(sp) \ + lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define srcu_read_release(sp) \ + lock_release(&(sp)->dep_map, 1, _THIS_IP_) + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + int init_srcu_struct(struct srcu_struct *sp); + +# define srcu_read_acquire(sp) do { } while (0) +# define srcu_read_release(sp) do { } while (0) + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + void cleanup_srcu_struct(struct srcu_struct *sp); -int srcu_read_lock(struct srcu_struct *sp) __acquires(sp); -void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); +int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); +void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +/** + * srcu_read_lock_held - might we be in SRCU read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in + * an SRCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an SRCU read-side critical section unless it can + * prove otherwise. + */ +static inline int srcu_read_lock_held(struct srcu_struct *sp) +{ + if (debug_locks) + return lock_is_held(&sp->dep_map); + return 1; +} + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +static inline int srcu_read_lock_held(struct srcu_struct *sp) +{ + return 1; +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/** + * srcu_read_lock - register a new reader for an SRCU-protected structure. + * @sp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section. Note that SRCU read-side + * critical sections may be nested. + */ +static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) +{ + int retval = __srcu_read_lock(sp); + + srcu_read_acquire(sp); + return retval; +} + +/** + * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. + * @sp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section. + */ +static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) + __releases(sp) +{ + srcu_read_release(sp); + __srcu_read_unlock(sp, idx); +} + #endif diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 9b7fd4723878..033cb55c26df 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -50,6 +50,16 @@ static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); EXPORT_SYMBOL_GPL(rcu_lock_map); + +static struct lock_class_key rcu_bh_lock_key; +struct lockdep_map rcu_bh_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key); +EXPORT_SYMBOL_GPL(rcu_bh_lock_map); + +static struct lock_class_key rcu_sched_lock_key; +struct lockdep_map rcu_sched_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); +EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif /* diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index adda92bfafac..5f43f30fcd1d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -796,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused) idx = cur_ops->readlock(); completed = cur_ops->completed(); - p = rcu_dereference(rcu_torture_current); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_held() || + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -853,7 +857,11 @@ rcu_torture_reader(void *arg) } idx = cur_ops->readlock(); completed = cur_ops->completed(); - p = rcu_dereference(rcu_torture_current); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_held() || + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); diff --git a/kernel/srcu.c b/kernel/srcu.c index 31b275b9c112..bde4295774c8 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -34,6 +34,30 @@ #include #include +static int init_srcu_struct_fields(struct srcu_struct *sp) +{ + sp->completed = 0; + mutex_init(&sp->mutex); + sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); + return sp->per_cpu_ref ? 0 : -ENOMEM; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(__init_srcu_struct); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /** * init_srcu_struct - initialize a sleep-RCU structure * @sp: structure to initialize. @@ -44,13 +68,12 @@ */ int init_srcu_struct(struct srcu_struct *sp) { - sp->completed = 0; - mutex_init(&sp->mutex); - sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); - return (sp->per_cpu_ref ? 0 : -ENOMEM); + return init_srcu_struct_fields(sp); } EXPORT_SYMBOL_GPL(init_srcu_struct); +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /* * srcu_readers_active_idx -- returns approximate number of readers * active on the specified rank of per-CPU counters. @@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -/** - * srcu_read_lock - register a new reader for an SRCU-protected structure. - * @sp: srcu_struct in which to register the new reader. - * +/* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Must be called from process context. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int srcu_read_lock(struct srcu_struct *sp) +int __srcu_read_lock(struct srcu_struct *sp) { int idx; @@ -120,26 +140,22 @@ int srcu_read_lock(struct srcu_struct *sp) preempt_enable(); return idx; } -EXPORT_SYMBOL_GPL(srcu_read_lock); +EXPORT_SYMBOL_GPL(__srcu_read_lock); -/** - * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. - * @sp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). - * +/* * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). * Must be called from process context. */ -void srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *sp, int idx) { preempt_disable(); srcu_barrier(); /* ensure compiler won't misorder critical section. */ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; preempt_enable(); } -EXPORT_SYMBOL_GPL(srcu_read_unlock); +EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6bf97d176326..6af20a8a0a54 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -499,6 +499,18 @@ config PROVE_LOCKING For more details, see Documentation/lockdep-design.txt. +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT diff --git a/lib/debug_locks.c b/lib/debug_locks.c index bc3b11731b9c..5bf0020b9248 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -23,6 +23,7 @@ * shut up after that. */ int debug_locks = 1; +EXPORT_SYMBOL_GPL(debug_locks); /* * The locking-testsuite uses to get a -- cgit v1.2.3 From 0632eb3d7563d6a76d49a3860b6352d800c92854 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:47 -0800 Subject: rcu: Integrate rcu_dereference_check() message into lockdep Make rcu_dereference_check() print the list of held locks in addition to the stack dump to ease debugging. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 4 ++++ include/linux/rcupdate.h | 4 ++-- kernel/lockdep.c | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9ccf0e286b2a..10206a87da19 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -534,4 +534,8 @@ do { \ # define might_lock_read(lock) do { } while (0) #endif +#ifdef CONFIG_PROVE_RCU +extern void lockdep_rcu_dereference(const char *file, const int line); +#endif + #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 839d296a7ac0..1a4de31bd7b4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -182,8 +182,8 @@ static inline int rcu_read_lock_sched_held(void) */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_locks) \ - WARN_ON_ONCE(!(c)); \ + if (debug_locks && !(c)) \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ rcu_dereference_raw(p); \ }) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c62ec14609b9..672c436946ce 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void) lockdep_print_held_locks(curr); } } + +void lockdep_rcu_dereference(const char *file, const int line) +{ + struct task_struct *curr = current; + + if (!debug_locks_off()) + return; + printk("\n==============================================\n"); + printk( "[ BUG: Unsafe rcu_dereference_check() usage! ]\n"); + printk( "----------------------------------------------\n"); + printk("%s:%d invoked rcu_dereference_check() without protection!\n", + file, line); + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + printk("\nstack backtrace:\n"); + dump_stack(); +} +EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); -- cgit v1.2.3 From d11c563dd20ff35da5652c3e1c989d9e10e1d6d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:50 -0800 Subject: sched: Use lockdep-based checking on rcu_dereference() Update the rcu_dereference() usages to take advantage of the new lockdep-based checking. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-6-git-send-email-paulmck@linux.vnet.ibm.com> [ -v2: fix allmodconfig missing symbol export build failure on x86 ] Signed-off-by: Ingo Molnar --- include/linux/cgroup.h | 5 ++++- include/linux/cred.h | 2 +- init/main.c | 2 ++ kernel/cgroup.c | 14 ++++++++++++++ kernel/exit.c | 14 +++++++++++--- kernel/fork.c | 1 + kernel/notifier.c | 6 +++--- kernel/pid.c | 2 +- kernel/sched.c | 11 ++++++++--- 9 files changed, 45 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0008dee66514..c9bbcb2a75ae 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -28,6 +28,7 @@ struct css_id; extern int cgroup_init_early(void); extern int cgroup_init(void); extern void cgroup_lock(void); +extern int cgroup_lock_is_held(void); extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); @@ -486,7 +487,9 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( static inline struct cgroup_subsys_state *task_subsys_state( struct task_struct *task, int subsys_id) { - return rcu_dereference(task->cgroups->subsys[subsys_id]); + return rcu_dereference_check(task->cgroups->subsys[subsys_id], + rcu_read_lock_held() || + cgroup_lock_is_held()); } static inline struct cgroup* task_cgroup(struct task_struct *task, diff --git a/include/linux/cred.h b/include/linux/cred.h index 4e3387a89cb9..4db09f89b637 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred) * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference((task)->real_cred))) + ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)))) /** * get_task_cred - Get another task's objective credentials diff --git a/init/main.c b/init/main.c index 4cb47a159f02..c75dcd6eef09 100644 --- a/init/main.c +++ b/init/main.c @@ -416,7 +416,9 @@ static noinline void __init_refok rest_init(void) kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); + rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); + rcu_read_unlock(); unlock_kernel(); /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aa3bee566446..b1a0f5a528fe 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -166,6 +166,20 @@ static DEFINE_SPINLOCK(hierarchy_id_lock); */ static int need_forkexit_callback __read_mostly; +#ifdef CONFIG_PROVE_LOCKING +int cgroup_lock_is_held(void) +{ + return lockdep_is_held(&cgroup_mutex); +} +#else /* #ifdef CONFIG_PROVE_LOCKING */ +int cgroup_lock_is_held(void) +{ + return mutex_is_locked(&cgroup_mutex); +} +#endif /* #else #ifdef CONFIG_PROVE_LOCKING */ + +EXPORT_SYMBOL_GPL(cgroup_lock_is_held); + /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) { diff --git a/kernel/exit.c b/kernel/exit.c index 546774a31a66..45ed043b8bf5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk) BUG_ON(!sig); BUG_ON(!atomic_read(&sig->count)); - sighand = rcu_dereference(tsk->sighand); + sighand = rcu_dereference_check(tsk->sighand, + rcu_read_lock_held() || + lockdep_is_held(&tasklist_lock)); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); @@ -170,8 +172,10 @@ void release_task(struct task_struct * p) repeat: tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and - * can't be modifying its own credentials */ + * can't be modifying its own credentials. But shut RCU-lockdep up */ + rcu_read_lock(); atomic_dec(&__task_cred(p)->user->processes); + rcu_read_unlock(); proc_flush_task(p); @@ -473,9 +477,11 @@ static void close_files(struct files_struct * files) /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the - * files structure. + * files structure. But use RCU to shut RCU-lockdep up. */ + rcu_read_lock(); fdt = files_fdtable(files); + rcu_read_unlock(); for (;;) { unsigned long set; i = j * __NFDBITS; @@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files) * at the end of the RCU grace period. Otherwise, * you can free files immediately. */ + rcu_read_lock(); fdt = files_fdtable(files); if (fdt != &files->fdtab) kmem_cache_free(files_cachep, files); free_fdtable(fdt); + rcu_read_unlock(); } } diff --git a/kernel/fork.c b/kernel/fork.c index f88bd984df35..17bbf093356d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -86,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +EXPORT_SYMBOL_GPL(tasklist_lock); int nr_processes(void) { diff --git a/kernel/notifier.c b/kernel/notifier.c index acd24e7643eb..2488ba7eb568 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, int ret = NOTIFY_DONE; struct notifier_block *nb, *next_nb; - nb = rcu_dereference(*nl); + nb = rcu_dereference_raw(*nl); while (nb && nr_to_call) { - next_nb = rcu_dereference(nb->next); + next_nb = rcu_dereference_raw(nb->next); #ifdef CONFIG_DEBUG_NOTIFIERS if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { @@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, * racy then it does not matter what the result of the test * is, we re-check the list after having taken the lock anyway: */ - if (rcu_dereference(nh->head)) { + if (rcu_dereference_raw(nh->head)) { down_read(&nh->rwsem); ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); diff --git a/kernel/pid.c b/kernel/pid.c index 2e17c9c92cbe..b08e697cd83f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference(pid->tasks[type].first); + first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); if (first) result = hlist_entry(first, struct task_struct, pids[(type)].node); } diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..70ae68680d4c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq) #endif } +#define for_each_domain_rd(p) \ + rcu_dereference_check((p), \ + rcu_read_lock_sched_held() || \ + lockdep_is_held(&sched_domains_mutex)) + /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -653,7 +658,7 @@ static inline int cpu_of(struct rq *rq) * preempt-disabled sections. */ #define for_each_domain(cpu, __sd) \ - for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) + for (__sd = for_each_domain_rd(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) @@ -1531,7 +1536,7 @@ static unsigned long target_load(int cpu, int type) static struct sched_group *group_of(int cpu) { - struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); + struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd); if (!sd) return NULL; @@ -4888,7 +4893,7 @@ static void run_rebalance_domains(struct softirq_action *h) static inline int on_null_domain(int cpu) { - return !rcu_dereference(cpu_rq(cpu)->sd); + return !rcu_dereference_sched(cpu_rq(cpu)->sd); } /* -- cgit v1.2.3 From 497f0ab39cd25bed317b29482c147c967f7ecd1f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:51 -0800 Subject: sched: Better name for for_each_domain_rd As suggested by Peter Ziljstra, make better choice of name for for_each_domain_rd(), containing "rcu_dereference", given that it is but a wrapper for rcu_dereference_check(). The name rcu_dereference_check_sched_domain() does that and provides a separate per-subsystem name space. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-7-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 70ae68680d4c..3218f5213717 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -645,7 +645,7 @@ static inline int cpu_of(struct rq *rq) #endif } -#define for_each_domain_rd(p) \ +#define rcu_dereference_check_sched_domain(p) \ rcu_dereference_check((p), \ rcu_read_lock_sched_held() || \ lockdep_is_held(&sched_domains_mutex)) @@ -658,7 +658,7 @@ static inline int cpu_of(struct rq *rq) * preempt-disabled sections. */ #define for_each_domain(cpu, __sd) \ - for (__sd = for_each_domain_rd(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) + for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) -- cgit v1.2.3 From 8bd93a2c5d4cab2ae17d06350daa7dbf546a4634 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:59 -0800 Subject: rcu: Accelerate grace period if last non-dynticked CPU Currently, rcu_needs_cpu() simply checks whether the current CPU has an outstanding RCU callback, which means that the last CPU to go into dyntick-idle mode might wait a few ticks for the relevant grace periods to complete. However, if all the other CPUs are in dyntick-idle mode, and if this CPU is in a quiescent state (which it is for RCU-bh and RCU-sched any time that we are considering going into dyntick-idle mode), then the grace period is instantly complete. This patch therefore repeatedly invokes the RCU grace-period machinery in order to force any needed grace periods to complete quickly. It does so a limited number of times in order to prevent starvation by an RCU callback function that might pass itself to call_rcu(). However, if any CPU other than the current one is not in dyntick-idle mode, fall back to simply checking (with fix to bug noted by Lai Jiangshan). Also, take advantage of last grace-period forcing, the opportunity to do so noted by Steve Rostedt. And apply simplified #ifdef condition suggested by Frederic Weisbecker. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-15-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 14 ++++++++++ init/Kconfig | 16 ++++++++++++ kernel/rcutree.c | 5 ++-- kernel/rcutree_plugin.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d77b54733c5b..dbcee7647d9a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -143,6 +143,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_not(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) #else @@ -202,6 +204,18 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +/** + * for_each_cpu_not - iterate over every cpu in a complemented mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_not(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next_zero((cpu), (mask)), \ + (cpu) < nr_cpu_ids;) + /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator diff --git a/init/Kconfig b/init/Kconfig index d95ca7cd5d45..42bf914b325a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -396,6 +396,22 @@ config RCU_FANOUT_EXACT Say N if unsure. +config RCU_FAST_NO_HZ + bool "Accelerate last non-dyntick-idle CPU's grace periods" + depends on TREE_RCU && NO_HZ && SMP + default n + help + This option causes RCU to attempt to accelerate grace periods + in order to allow the final CPU to enter dynticks-idle state + more quickly. On the other hand, this option increases the + overhead of the dynticks-idle checking, particularly on systems + with large numbers of CPUs. + + Say Y if energy efficiency is critically important, particularly + if you have relatively few CPUs. + + Say N if you are unsure. + config TREE_RCU_TRACE def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) select DEBUG_FS diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 099a255ede4c..29d88c08d875 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1550,10 +1550,9 @@ static int rcu_pending(int cpu) /* * Check to see if any future RCU-related work will need to be done * by the current CPU, even if none need be done immediately, returning - * 1 if so. This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. + * 1 if so. */ -int rcu_needs_cpu(int cpu) +static int rcu_needs_cpu_quick_check(int cpu) { /* RCU callbacks either ready or pending? */ return per_cpu(rcu_sched_data, cpu).nxtlist || diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e77cdf321e13..a82566696b0b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void) } #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + +#if !defined(CONFIG_RCU_FAST_NO_HZ) + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we have preemptible RCU, just check whether this CPU needs + * any flavor of RCU. Do not chew up lots of CPU cycles with preemption + * disabled in a most-likely vain attempt to cause RCU not to need this CPU. + */ +int rcu_needs_cpu(int cpu) +{ + return rcu_needs_cpu_quick_check(cpu); +} + +#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ + +#define RCU_NEEDS_CPU_FLUSHES 5 + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we are not supporting preemptible RCU, attempt to accelerate + * any current grace periods so that RCU no longer needs this CPU, but + * only if all other CPUs are already in dynticks-idle mode. This will + * allow the CPU cores to be powered down immediately, as opposed to after + * waiting many milliseconds for grace periods to elapse. + */ +int rcu_needs_cpu(int cpu) +{ + int c = 1; + int i; + int thatcpu; + + /* Don't bother unless we are the last non-dyntick-idle CPU. */ + for_each_cpu_not(thatcpu, nohz_cpu_mask) + if (thatcpu != cpu) + return rcu_needs_cpu_quick_check(cpu); + + /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ + for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { + c = 0; + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state, 0); + __rcu_process_callbacks(&rcu_sched_state, + &per_cpu(rcu_sched_data, cpu)); + c = !!per_cpu(rcu_sched_data, cpu).nxtlist; + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state, 0); + __rcu_process_callbacks(&rcu_bh_state, + &per_cpu(rcu_bh_data, cpu)); + c = !!per_cpu(rcu_bh_data, cpu).nxtlist; + } + } + + /* If RCU callbacks are still pending, RCU still needs this CPU. */ + return c; +} + +#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.3 From 20133cfce7d0bbdcc0c398301030c091f5675c88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:01 -0800 Subject: rcu: Stop overflowing signed integers The C standard does not specify the result of an operation that overflows a signed integer, so such operations need to be avoided. This patch changes the type of several fields from "long" to "unsigned long" and adjusts operations as needed. ULONG_CMP_GE() and ULONG_CMP_LT() macros are introduced to do the modular comparisons that are appropriate given that overflow is an expected event. Acked-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-17-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 11 +++++------ kernel/rcutree.h | 33 ++++++++++++++++++--------------- kernel/rcutree_trace.c | 14 +++++++------- 3 files changed, 30 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 29d88c08d875..dd0d31dffcdc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -500,7 +500,7 @@ static void print_cpu_stall(struct rcu_state *rsp) trigger_all_cpu_backtrace(); spin_lock_irqsave(&rnp->lock, flags); - if ((long)(jiffies - rsp->jiffies_stall) >= 0) + if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; spin_unlock_irqrestore(&rnp->lock, flags); @@ -1216,8 +1216,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ return; /* Someone else is already on the job. */ } - if (relaxed && - (long)(rsp->jiffies_force_qs - jiffies) >= 0) + if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); /* irqs already disabled */ @@ -1295,7 +1294,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) * If an RCU GP has gone long enough, go check for dyntick * idle CPUs and, if needed, send resched IPIs. */ - if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) + if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); /* @@ -1392,7 +1391,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), force_quiescent_state(rsp, 0); rdp->n_force_qs_snap = rsp->n_force_qs; rdp->qlen_last_fqs_check = rdp->qlen; - } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) + } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); local_irq_restore(flags); } @@ -1525,7 +1524,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (rcu_gp_in_progress(rsp) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { rdp->n_rp_need_fqs++; return 1; } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d9d032abd665..7495fed49c30 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -92,10 +92,10 @@ struct rcu_dynticks { struct rcu_node { spinlock_t lock; /* Root rcu_node's lock protects some */ /* rcu_state fields as well as following. */ - long gpnum; /* Current grace period for this node. */ + unsigned long gpnum; /* Current grace period for this node. */ /* This will either be equal to or one */ /* behind the root rcu_node's gpnum. */ - long completed; /* Last grace period completed for this node. */ + unsigned long completed; /* Last GP completed for this node. */ /* This will either be equal to or one */ /* behind the root rcu_node's gpnum. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ @@ -161,11 +161,11 @@ struct rcu_node { /* Per-CPU data for read-copy update. */ struct rcu_data { /* 1) quiescent-state and grace-period handling : */ - long completed; /* Track rsp->completed gp number */ + unsigned long completed; /* Track rsp->completed gp number */ /* in order to detect GP end. */ - long gpnum; /* Highest gp number that this CPU */ + unsigned long gpnum; /* Highest gp number that this CPU */ /* is aware of having started. */ - long passed_quiesc_completed; + unsigned long passed_quiesc_completed; /* Value of completed at time of qs. */ bool passed_quiesc; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ @@ -221,14 +221,14 @@ struct rcu_data { unsigned long resched_ipi; /* Sent a resched IPI. */ /* 5) __rcu_pending() statistics. */ - long n_rcu_pending; /* rcu_pending() calls since boot. */ - long n_rp_qs_pending; - long n_rp_cb_ready; - long n_rp_cpu_needs_gp; - long n_rp_gp_completed; - long n_rp_gp_started; - long n_rp_need_fqs; - long n_rp_need_nothing; + unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ + unsigned long n_rp_qs_pending; + unsigned long n_rp_cb_ready; + unsigned long n_rp_cpu_needs_gp; + unsigned long n_rp_gp_completed; + unsigned long n_rp_gp_started; + unsigned long n_rp_need_fqs; + unsigned long n_rp_need_nothing; int cpu; }; @@ -255,6 +255,9 @@ struct rcu_data { #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /* * RCU global state, including node hierarchy. This hierarchy is * represented in "heap" form in a dense array. The root (first level) @@ -283,8 +286,8 @@ struct rcu_state { /* period because */ /* force_quiescent_state() */ /* was running. */ - long gpnum; /* Current gp number. */ - long completed; /* # of last completed gp. */ + unsigned long gpnum; /* Current gp number. */ + unsigned long completed; /* # of last completed gp. */ /* End of fields guarded by root rcu_node's lock. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9d2c88423b31..d45db2e35d27 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -50,7 +50,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d", + seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->completed, rdp->gpnum, @@ -105,7 +105,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d", + seq_printf(m, "%d,%s,%lu,%lu,%d,%lu,%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", rdp->completed, rdp->gpnum, @@ -155,13 +155,13 @@ static const struct file_operations rcudata_csv_fops = { static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) { - long gpnum; + unsigned long gpnum; int level = 0; int phase; struct rcu_node *rnp; gpnum = rsp->gpnum; - seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " + seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", rsp->completed, gpnum, rsp->signaled, (long)(rsp->jiffies_force_qs - jiffies), @@ -215,12 +215,12 @@ static const struct file_operations rcuhier_fops = { static int show_rcugp(struct seq_file *m, void *unused) { #ifdef CONFIG_TREE_PREEMPT_RCU - seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n", + seq_printf(m, "rcu_preempt: completed=%ld gpnum=%lu\n", rcu_preempt_state.completed, rcu_preempt_state.gpnum); #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n", + seq_printf(m, "rcu_sched: completed=%ld gpnum=%lu\n", rcu_sched_state.completed, rcu_sched_state.gpnum); - seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", + seq_printf(m, "rcu_bh: completed=%ld gpnum=%lu\n", rcu_bh_state.completed, rcu_bh_state.gpnum); return 0; } -- cgit v1.2.3 From 1304afb225288a2e250d6a7495462c28e5509cbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:02 -0800 Subject: rcu: Convert to raw_spinlocks The spinlocks in rcutree need to be real spinlocks in preempt-rt. Convert them to raw_spinlocks. Signed-off-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-18-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 132 ++++++++++++++++++++++++------------------------ kernel/rcutree.h | 6 +-- kernel/rcutree_plugin.h | 46 ++++++++--------- 3 files changed, 92 insertions(+), 92 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd0d31dffcdc..65a807b4f58c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -66,11 +66,11 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \ .orphan_cbs_list = NULL, \ .orphan_cbs_tail = &name.orphan_cbs_list, \ .orphan_qlen = 0, \ - .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ } @@ -457,10 +457,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* Only let one CPU complain about others per time interval. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); delta = jiffies - rsp->jiffies_stall; if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; @@ -470,7 +470,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) * due to CPU offlining. */ rcu_print_task_stall(rnp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* OK, time to rat on our buddy... */ @@ -499,11 +499,11 @@ static void print_cpu_stall(struct rcu_state *rsp) smp_processor_id(), jiffies - rsp->gp_start); trigger_all_cpu_backtrace(); - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); set_need_resched(); /* kick ourselves to get things going. */ } @@ -563,12 +563,12 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ - !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; } __note_new_gpnum(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -627,12 +627,12 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ - !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; } __rcu_process_gp_end(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -681,10 +681,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) if (cpu_needs_another_gp(rsp, rdp)) rsp->fqs_need_gp = 1; if (rnp->completed == rsp->completed) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* * Propagate new ->completed value to rcu_node structures @@ -692,9 +692,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * of the next grace period to process their callbacks. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->completed = rsp->completed; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } local_irq_restore(flags); return; @@ -715,15 +715,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ rcu_start_gp_per_cpu(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - spin_unlock(&rnp->lock); /* leave irqs disabled. */ + raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ /* Exclude any concurrent CPU-hotplug operations. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ /* * Set the quiescent-state-needed bits in all the rcu_node @@ -743,21 +743,21 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * irqs disabled. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; rnp->completed = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } rnp = rcu_get_root(rsp); - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ - spin_unlock(&rnp->lock); /* irqs remain disabled. */ - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } /* @@ -796,14 +796,14 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, if (!(rnp->qsmask & mask)) { /* Our bit has already been cleared, so done. */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rnp->qsmask &= ~mask; if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { /* Other bits still set at this level, so done. */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } mask = rnp->grpmask; @@ -813,10 +813,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, break; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); rnp_c = rnp; rnp = rnp->parent; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); WARN_ON_ONCE(rnp_c->qsmask); } @@ -845,7 +845,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las struct rcu_node *rnp; rnp = rdp->mynode; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); if (lastcomp != rnp->completed) { /* @@ -857,12 +857,12 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las * race occurred. */ rdp->passed_quiesc = 0; /* try again later! */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } mask = rdp->grpmask; if ((rnp->qsmask & mask) == 0) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { rdp->qs_pending = 0; @@ -926,7 +926,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) if (rdp->nxtlist == NULL) return; /* irqs disabled, so comparison is stable. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ *rsp->orphan_cbs_tail = rdp->nxtlist; rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtlist = NULL; @@ -934,7 +934,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) rdp->nxttail[i] = &rdp->nxtlist; rsp->orphan_qlen += rdp->qlen; rdp->qlen = 0; - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ } /* @@ -945,10 +945,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) unsigned long flags; struct rcu_data *rdp; - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); rdp = rsp->rda[smp_processor_id()]; if (rsp->orphan_cbs_list == NULL) { - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); return; } *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; @@ -957,7 +957,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) rsp->orphan_cbs_list = NULL; rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; rsp->orphan_qlen = 0; - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } /* @@ -973,23 +973,23 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) struct rcu_node *rnp; /* Exclude any attempts to start a new grace period. */ - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ mask = rdp->grpmask; /* rnp->grplo is constant. */ do { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { if (rnp != rdp->mynode) - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } if (rnp == rdp->mynode) need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); else - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; rnp = rnp->parent; } while (rnp != NULL); @@ -1000,12 +1000,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) * because invoking rcu_report_unblock_qs_rnp() with ->onofflock * held leads to deadlock. */ - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ rnp = rdp->mynode; if (need_report & RCU_OFL_TASKS_NORM_GP) rcu_report_unblock_qs_rnp(rnp, flags); else - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp); @@ -1176,13 +1176,13 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) rcu_for_each_leaf_node(rsp, rnp) { mask = 0; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); if (!rcu_gp_in_progress(rsp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } if (rnp->qsmask == 0) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); continue; } cpu = rnp->grplo; @@ -1197,7 +1197,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) rcu_report_qs_rnp(mask, rsp, rnp, flags); continue; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } } @@ -1212,18 +1212,18 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ - if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { + if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ return; /* Someone else is already on the job. */ } if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ goto unlock_fqs_ret; /* no GP in progress, time updated. */ } rsp->fqs_active = 1; @@ -1235,13 +1235,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_SAVE_DYNTICK: - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ force_qs_rnp(rsp, dyntick_save_progress_counter); - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; break; @@ -1249,24 +1249,24 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ force_qs_rnp(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ break; } rsp->fqs_active = 0; if (rsp->fqs_need_gp) { - spin_unlock(&rsp->fqslock); /* irqs remain disabled */ + raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ rsp->fqs_need_gp = 0; rcu_start_gp(rsp, flags); /* releases rnp->lock */ return; } - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: - spin_unlock_irqrestore(&rsp->fqslock, flags); + raw_spin_unlock_irqrestore(&rsp->fqslock, flags); } #else /* #ifdef CONFIG_SMP */ @@ -1308,7 +1308,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) /* Does this CPU require a not-yet-started grace period? */ if (cpu_needs_another_gp(rsp, rdp)) { - spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); + raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); rcu_start_gp(rsp, flags); /* releases above lock */ } @@ -1373,7 +1373,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), unsigned long nestflag; struct rcu_node *rnp_root = rcu_get_root(rsp); - spin_lock_irqsave(&rnp_root->lock, nestflag); + raw_spin_lock_irqsave(&rnp_root->lock, nestflag); rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ } @@ -1662,7 +1662,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) @@ -1672,7 +1672,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); #endif /* #ifdef CONFIG_NO_HZ */ rdp->cpu = cpu; - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -1690,7 +1690,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ @@ -1698,7 +1698,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* * A new grace period might start here. If so, we won't be part @@ -1706,14 +1706,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) */ /* Exclude any attempts to start a new GP on large systems. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ /* Add CPU to rcu_node bitmasks. */ rnp = rdp->mynode; mask = rdp->grpmask; do { /* Exclude any attempts to start a new GP on small systems. */ - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit |= mask; mask = rnp->grpmask; if (rnp == rdp->mynode) { @@ -1721,11 +1721,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rdp->completed = rnp->completed; rdp->passed_quiesc_completed = rnp->completed - 1; } - spin_unlock(&rnp->lock); /* irqs already disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } static void __cpuinit rcu_online_cpu(int cpu) @@ -1832,7 +1832,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { - spin_lock_init(&rnp->lock); + raw_spin_lock_init(&rnp->lock); lockdep_set_class_and_name(&rnp->lock, &rcu_node_class[i], buf[i]); rnp->gpnum = 0; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7495fed49c30..6a82c34ce669 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -90,7 +90,7 @@ struct rcu_dynticks { * Definition for node within the RCU grace-period-detection hierarchy. */ struct rcu_node { - spinlock_t lock; /* Root rcu_node's lock protects some */ + raw_spinlock_t lock; /* Root rcu_node's lock protects some */ /* rcu_state fields as well as following. */ unsigned long gpnum; /* Current grace period for this node. */ /* This will either be equal to or one */ @@ -291,7 +291,7 @@ struct rcu_state { /* End of fields guarded by root rcu_node's lock. */ - spinlock_t onofflock; /* exclude on/offline and */ + raw_spinlock_t onofflock; /* exclude on/offline and */ /* starting new GP. Also */ /* protects the following */ /* orphan_cbs fields. */ @@ -301,7 +301,7 @@ struct rcu_state { /* going offline. */ struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ long orphan_qlen; /* Number of orphaned cbs. */ - spinlock_t fqslock; /* Only one task forcing */ + raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a82566696b0b..a8b2e834fd3a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -111,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu) /* Possibly blocking in an RCU read-side critical section. */ rdp = rcu_preempt_state.rda[cpu]; rnp = rdp->mynode; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; t->rcu_blocked_node = rnp; @@ -132,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu) WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -189,7 +189,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) struct rcu_node *rnp_p; if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; /* Still need more quiescent states! */ } @@ -206,8 +206,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) /* Report up the rest of the hierarchy. */ mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ - spin_lock(&rnp_p->lock); /* irqs already disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); } @@ -257,10 +257,10 @@ static void rcu_read_unlock_special(struct task_struct *t) */ for (;;) { rnp = t->rcu_blocked_node; - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ if (rnp == t->rcu_blocked_node) break; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = !rcu_preempted_readers(rnp); empty_exp = !rcu_preempted_readers_exp(rnp); @@ -274,7 +274,7 @@ static void rcu_read_unlock_special(struct task_struct *t) * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ if (empty) - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); else rcu_report_unblock_qs_rnp(rnp, flags); @@ -324,12 +324,12 @@ static void rcu_print_task_stall(struct rcu_node *rnp) struct task_struct *t; if (rcu_preempted_readers(rnp)) { - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); phase = rnp->gpnum & 0x1; lp = &rnp->blocked_tasks[phase]; list_for_each_entry(t, lp, rcu_node_entry) printk(" P%d", t->pid); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } } @@ -400,11 +400,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, lp_root = &rnp_root->blocked_tasks[i]; while (!list_empty(lp)) { tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); - spin_lock(&rnp_root->lock); /* irqs already disabled */ + raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ list_del(&tp->rcu_node_entry); tp->rcu_blocked_node = rnp_root; list_add(&tp->rcu_node_entry, lp_root); - spin_unlock(&rnp_root->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } return retval; @@ -528,7 +528,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) unsigned long flags; unsigned long mask; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); for (;;) { if (!sync_rcu_preempt_exp_done(rnp)) break; @@ -537,12 +537,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) break; } mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ rnp = rnp->parent; - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ rnp->expmask &= ~mask; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -557,11 +557,11 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) { int must_wait; - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); must_wait = rcu_preempted_readers_exp(rnp); - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ if (!must_wait) rcu_report_exp_rnp(rsp, rnp); } @@ -606,13 +606,13 @@ void synchronize_rcu_expedited(void) /* force all RCU readers onto blocked_tasks[]. */ synchronize_sched_expedited(); - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); /* Initialize ->expmask for all non-leaf rcu_node structures. */ rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->expmask = rnp->qsmaskinit; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } /* Snapshot current state of ->blocked_tasks[] lists. */ @@ -621,7 +621,7 @@ void synchronize_rcu_expedited(void) if (NUM_RCU_NODES > 1) sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ rnp = rcu_get_root(rsp); @@ -756,7 +756,7 @@ static int rcu_preempted_readers(struct rcu_node *rnp) /* Because preemptible RCU does not exist, no quieting of tasks. */ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 3acd9eb31c5f7eb97cb2009fa41472710fb4a10f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:03 -0800 Subject: rcu: Fix deadlock in TREE_PREEMPT_RCU CPU stall detection Under TREE_PREEMPT_RCU, print_other_cpu_stall() invokes rcu_print_task_stall() with the root rcu_node structure's ->lock held, and rcu_print_task_stall() acquires that same lock for self-deadlock. Fix this by removing the lock acquisition from rcu_print_task_stall(), and making all callers acquire the lock instead. Tested-by: John Kacur Tested-by: Thomas Gleixner Located-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-19-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 2 ++ kernel/rcutree_plugin.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 65a807b4f58c..b07be37d2aa3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -476,7 +476,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp) printk(KERN_ERR "INFO: RCU detected CPU stalls:"); rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); rcu_print_task_stall(rnp); + raw_spin_unlock_irqrestore(&rnp->lock, flags); if (rnp->qsmask == 0) continue; for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a8b2e834fd3a..aecfe37e0117 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -318,18 +318,15 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); */ static void rcu_print_task_stall(struct rcu_node *rnp) { - unsigned long flags; struct list_head *lp; int phase; struct task_struct *t; if (rcu_preempted_readers(rnp)) { - raw_spin_lock_irqsave(&rnp->lock, flags); phase = rnp->gpnum & 0x1; lp = &rnp->blocked_tasks[phase]; list_for_each_entry(t, lp, rcu_node_entry) printk(" P%d", t->pid); - raw_spin_unlock_irqrestore(&rnp->lock, flags); } } -- cgit v1.2.3 From 6155fec92e85f07d99e9746234496215443ffb0d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:04 -0800 Subject: rcu: Fix rcutorture mod_timer argument to delay one jiffy The current "mod_timer(&t, 1)" potentially makes the timer fire immediately, change this to wait one jiffy. Signed-off-by: Dan Carpenter Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-20-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 5f43f30fcd1d..258cdf0a91eb 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -853,7 +853,7 @@ rcu_torture_reader(void *arg) do { if (irqreader && cur_ops->irq_capable) { if (!timer_pending(&t)) - mod_timer(&t, 1); + mod_timer(&t, jiffies + 1); } idx = cur_ops->readlock(); completed = cur_ops->completed(); -- cgit v1.2.3 From 1ed509a225008c9e8c0644fbd22168e09a7383a0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:05 -0800 Subject: rcu: Add RCU_CPU_STALL_VERBOSE to dump detailed per-task information When RCU detects a grace-period stall, it currently just prints out the PID of any tasks doing the stalling. This patch adds RCU_CPU_STALL_VERBOSE, which enables the more-verbose reporting from sched_show_task(). Suggested-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-21-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 4 ++++ kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 12 ++++++++++++ 4 files changed, 69 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b07be37d2aa3..525d39810616 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -489,6 +489,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp) smp_processor_id(), (long)(jiffies - rsp->gp_start)); trigger_all_cpu_backtrace(); + /* If so configured, complain about tasks blocking the grace period. */ + + rcu_print_detail_task_stall(rsp); + force_quiescent_state(rsp, 0); /* Kick them all. */ } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 6a82c34ce669..2ceb08388582 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -352,6 +352,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index aecfe37e0117..3516de7091a1 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -312,6 +312,50 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#ifdef CONFIG_RCU_CPU_STALL_VERBOSE + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period on the specified rcu_node structure. + */ +static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) +{ + unsigned long flags; + struct list_head *lp; + int phase; + struct task_struct *t; + + if (rcu_preempted_readers(rnp)) { + raw_spin_lock_irqsave(&rnp->lock, flags); + phase = rnp->gpnum & 0x1; + lp = &rnp->blocked_tasks[phase]; + list_for_each_entry(t, lp, rcu_node_entry) + sched_show_task(t); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } +} + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period. + */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + + rcu_print_detail_task_stall_rnp(rnp); + rcu_for_each_leaf_node(rsp, rnp) + rcu_print_detail_task_stall_rnp(rnp); +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. @@ -760,6 +804,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +/* + * Because preemptable RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + /* * Because preemptable RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6af20a8a0a54..4cdab452bfe2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -781,6 +781,18 @@ config RCU_CPU_STALL_DETECTOR Say Y if you are unsure. +config RCU_CPU_STALL_VERBOSE + bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" + depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU + default n + help + This option causes RCU to printk detailed per-task information + for any tasks that are stalling the current RCU grace period. + + Say N if you are unsure. + + Say Y if you want to enable such checks. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v1.2.3 From c50cc75271759373bd89a036eec4d4269b291616 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 25 Feb 2010 12:02:13 +0100 Subject: sched, cgroups: Fix module export I have exported it in d11c563 - but cgroups.c did not have module.h included ... Cc: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-6-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/cgroup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b1a0f5a528fe..4fd90e129772 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -23,6 +23,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3 From 056ba4a9bea5f32781a36b797c562fb731e5eaa6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Feb 2010 14:06:46 -0800 Subject: rcu: Make lockdep_rcu_dereference() message less alarmist Change from "unsafe" to "suspicious", given that there will be false alarms. Suggested-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267135607-7056-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 672c436946ce..0c30d0455de1 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3816,9 +3816,9 @@ void lockdep_rcu_dereference(const char *file, const int line) if (!debug_locks_off()) return; - printk("\n==============================================\n"); - printk( "[ BUG: Unsafe rcu_dereference_check() usage! ]\n"); - printk( "----------------------------------------------\n"); + printk("\n===================================================\n"); + printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); + printk( "---------------------------------------------------\n"); printk("%s:%d invoked rcu_dereference_check() without protection!\n", file, line); printk("\nother info that might help us debug this:\n\n"); -- cgit v1.2.3 From d9f1bb6ad7fc53c406706f47858dd5ff030b14a3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Feb 2010 14:06:47 -0800 Subject: rcu: Make rcu_read_lock_sched_held() take boot time into account Before the scheduler starts, all tasks are non-preemptible by definition. So, during that time, rcu_read_lock_sched_held() needs to always return "true". This patch makes that be so. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267135607-7056-2-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 4 +++- include/linux/rcutiny.h | 4 ---- include/linux/rcutree.h | 1 - kernel/rcupdate.c | 18 ++++++++++++++++++ kernel/rcutree.c | 19 ------------------- 5 files changed, 21 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a4de31bd7b4..fcea332a8424 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -62,6 +62,8 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); +extern int rcu_scheduler_active; +extern void rcu_scheduler_starting(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -140,7 +142,7 @@ static inline int rcu_read_lock_sched_held(void) if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0; + return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active; } #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2b70d4e37383..a5195875480a 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -105,10 +105,6 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ -static inline void rcu_scheduler_starting(void) -{ -} - static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 704a010f686c..42cc3a04779e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,7 +35,6 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); -extern void rcu_scheduler_starting(void); extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 033cb55c26df..7bfa004572b1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -62,6 +63,23 @@ struct lockdep_map rcu_sched_lock_map = EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif +int rcu_scheduler_active __read_mostly; + +/* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 525d39810616..335bfe4f0076 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,7 +46,6 @@ #include #include #include -#include #include "rcutree.h" @@ -81,9 +80,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); -static int rcu_scheduler_active __read_mostly; - - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -1565,21 +1561,6 @@ static int rcu_needs_cpu_quick_check(int cpu) rcu_preempt_needs_cpu(cpu); } -/* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. - */ -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} - static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); -- cgit v1.2.3 From f5f654096487c6d526c47bb66308f9de81f091cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Feb 2010 19:02:30 -0800 Subject: rcu: Export rcu_scheduler_active Kernel modules using rcu_read_lock_sched_held() must now have access to rcu_scheduler_active, so it must be exported. This should fix the fix for the boot-time RCU-lockdep splat. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <20100226030230.GA7743@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcupdate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 7bfa004572b1..f1125c1a6321 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -64,6 +64,7 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * This function is invoked towards the end of the scheduler's initialization -- cgit v1.2.3 From a47cd880b50e14b0b6f5e9d426ae9a2676c9c474 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Feb 2010 16:38:56 -0800 Subject: rcu: Fix accelerated grace periods for last non-dynticked CPU It is invalid to invoke __rcu_process_callbacks() with irqs disabled, so do it indirectly via raise_softirq(). This requires a state-machine implementation to cycle through the grace-period machinery the required number of times. Located-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267231138-27856-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 3 ++ kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 73 +++++++++++++++++++++++++++++++++++-------------- 3 files changed, 57 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 335bfe4f0076..3ec8160fc75f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1341,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused) * grace-period manipulations above. */ smp_mb(); /* See above block comment. */ + + /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ + rcu_needs_cpu_flush(); } static void diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 2ceb08388582..1439eb504c22 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -373,5 +373,6 @@ static int rcu_preempt_needs_cpu(int cpu); static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_orphanage(void); static void __init __rcu_init_preempt(void); +static void rcu_needs_cpu_flush(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3516de7091a1..ed241fc478f0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -973,9 +973,19 @@ int rcu_needs_cpu(int cpu) return rcu_needs_cpu_quick_check(cpu); } +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle + * entry is not configured, so we never do need to. + */ +static void rcu_needs_cpu_flush(void) +{ +} + #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #define RCU_NEEDS_CPU_FLUSHES 5 +static DEFINE_PER_CPU(int, rcu_dyntick_drain); /* * Check to see if any future RCU-related work will need to be done @@ -988,39 +998,62 @@ int rcu_needs_cpu(int cpu) * only if all other CPUs are already in dynticks-idle mode. This will * allow the CPU cores to be powered down immediately, as opposed to after * waiting many milliseconds for grace periods to elapse. + * + * Because it is not legal to invoke rcu_process_callbacks() with irqs + * disabled, we do one pass of force_quiescent_state(), then do a + * raise_softirq() to cause rcu_process_callbacks() to be invoked later. + * The per-cpu rcu_dyntick_drain variable controls the sequencing. */ int rcu_needs_cpu(int cpu) { - int c = 1; - int i; + int c = 0; int thatcpu; /* Don't bother unless we are the last non-dyntick-idle CPU. */ for_each_cpu_not(thatcpu, nohz_cpu_mask) - if (thatcpu != cpu) + if (thatcpu != cpu) { + per_cpu(rcu_dyntick_drain, cpu) = 0; return rcu_needs_cpu_quick_check(cpu); - - /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ - for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { - c = 0; - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state, 0); - __rcu_process_callbacks(&rcu_sched_state, - &per_cpu(rcu_sched_data, cpu)); - c = !!per_cpu(rcu_sched_data, cpu).nxtlist; - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state, 0); - __rcu_process_callbacks(&rcu_bh_state, - &per_cpu(rcu_bh_data, cpu)); - c = !!per_cpu(rcu_bh_data, cpu).nxtlist; } + + /* Check and update the rcu_dyntick_drain sequencing. */ + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* First time through, initialize the counter. */ + per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; + } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* We have hit the limit, so time to give up. */ + return rcu_needs_cpu_quick_check(cpu); + } + + /* Do one step pushing remaining RCU callbacks through. */ + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state, 0); + c = c || per_cpu(rcu_sched_data, cpu).nxtlist; + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state, 0); + c = c || per_cpu(rcu_bh_data, cpu).nxtlist; } /* If RCU callbacks are still pending, RCU still needs this CPU. */ + if (c) + raise_softirq(RCU_SOFTIRQ); return c; } +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. + */ +static void rcu_needs_cpu_flush(void) +{ + int cpu = smp_processor_id(); + + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) + return; + (void)rcu_needs_cpu(cpu); +} + #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.3 From 71da81324c83ef65bb196c7f874ac1c6996d8287 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Feb 2010 16:38:58 -0800 Subject: rcu: Fix accelerated GPs for last non-dynticked CPU This patch disables irqs across the call to rcu_needs_cpu(). It also enforces a hold-off period so that the idle loop doesn't softirq itself to death when there are lots of RCU callbacks in flight on the last non-dynticked CPU. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267231138-27856-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ed241fc478f0..464ad2cdee00 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -986,6 +986,7 @@ static void rcu_needs_cpu_flush(void) #define RCU_NEEDS_CPU_FLUSHES 5 static DEFINE_PER_CPU(int, rcu_dyntick_drain); +static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); /* * Check to see if any future RCU-related work will need to be done @@ -1013,6 +1014,7 @@ int rcu_needs_cpu(int cpu) for_each_cpu_not(thatcpu, nohz_cpu_mask) if (thatcpu != cpu) { per_cpu(rcu_dyntick_drain, cpu) = 0; + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); } @@ -1022,6 +1024,7 @@ int rcu_needs_cpu(int cpu) per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; return rcu_needs_cpu_quick_check(cpu); } @@ -1038,8 +1041,10 @@ int rcu_needs_cpu(int cpu) } /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) + if (c) { raise_softirq(RCU_SOFTIRQ); + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + } return c; } @@ -1050,10 +1055,13 @@ int rcu_needs_cpu(int cpu) static void rcu_needs_cpu_flush(void) { int cpu = smp_processor_id(); + unsigned long flags; if (per_cpu(rcu_dyntick_drain, cpu) <= 0) return; + local_irq_save(flags); (void)rcu_needs_cpu(cpu); + local_irq_restore(flags); } #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.3