summaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c144
1 files changed, 99 insertions, 45 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..365f0b90b4de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,11 +665,57 @@ static int effective_prio(task_t *p)
}
/*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired, and switch periodically
+ * regardless, to ensure that highly interactive tasks do not starve
+ * the less fortunate for unreasonably long periods.
+ */
+static inline int expired_starving(runqueue_t *rq)
+{
+ int limit;
+
+ /*
+ * Arrays were recently switched, all is well
+ */
+ if (!rq->expired_timestamp)
+ return 0;
+
+ limit = STARVATION_LIMIT * rq->nr_running;
+
+ /*
+ * It's time to switch arrays
+ */
+ if (jiffies - rq->expired_timestamp >= limit)
+ return 1;
+
+ /*
+ * There's a better selection in the expired array
+ */
+ if (rq->curr->static_prio > rq->best_expired_prio)
+ return 1;
+
+ /*
+ * All is well
+ */
+ return 0;
+}
+
+/*
* __activate_task - move a task to the runqueue.
*/
-static inline void __activate_task(task_t *p, runqueue_t *rq)
+static void __activate_task(task_t *p, runqueue_t *rq)
{
- enqueue_task(p, rq->active);
+ prio_array_t *target = rq->active;
+
+ if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
+ target = rq->expired;
+ enqueue_task(p, target);
rq->nr_running++;
}
@@ -688,7 +734,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
unsigned long long __sleep_time = now - p->timestamp;
unsigned long sleep_time;
- if (unlikely(p->policy == SCHED_BATCH))
+ if (batch_task(p))
sleep_time = 0;
else {
if (__sleep_time > NS_MAX_SLEEP_AVG)
@@ -700,21 +746,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
if (likely(sleep_time > 0)) {
/*
* User tasks that sleep a long time are categorised as
- * idle and will get just interactive status to stay active &
- * prevent them suddenly becoming cpu hogs and starving
- * other processes.
+ * idle. They will only have their sleep_avg increased to a
+ * level that makes them just interactive priority to stay
+ * active yet prevent them suddenly becoming cpu hogs and
+ * starving other processes.
*/
- if (p->mm && p->activated != -1 &&
- sleep_time > INTERACTIVE_SLEEP(p)) {
- p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
- DEF_TIMESLICE);
+ if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) {
+ unsigned long ceiling;
+
+ ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG -
+ DEF_TIMESLICE);
+ if (p->sleep_avg < ceiling)
+ p->sleep_avg = ceiling;
} else {
/*
* Tasks waking from uninterruptible sleep are
* limited in their sleep_avg rise as they
* are likely to be waiting on I/O
*/
- if (p->activated == -1 && p->mm) {
+ if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
sleep_time = 0;
else if (p->sleep_avg + sleep_time >=
@@ -769,7 +819,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
* This checks to make sure it's not an uninterruptible task
* that is now waking up.
*/
- if (!p->activated) {
+ if (p->sleep_type == SLEEP_NORMAL) {
/*
* Tasks which were woken up by interrupts (ie. hw events)
* are most likely of interactive nature. So we give them
@@ -778,13 +828,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
* on a CPU, first time around:
*/
if (in_interrupt())
- p->activated = 2;
+ p->sleep_type = SLEEP_INTERRUPTED;
else {
/*
* Normal first-time wakeups get a credit too for
* on-runqueue time, but it will be weighted down:
*/
- p->activated = 1;
+ p->sleep_type = SLEEP_INTERACTIVE;
}
}
p->timestamp = now;
@@ -1272,19 +1322,19 @@ out_activate:
* Tasks on involuntary sleep don't earn
* sleep_avg beyond just interactive state.
*/
- p->activated = -1;
- }
+ p->sleep_type = SLEEP_NONINTERACTIVE;
+ } else
/*
* Tasks that have marked their sleep as noninteractive get
- * woken up without updating their sleep average. (i.e. their
- * sleep is handled in a priority-neutral manner, no priority
- * boost and no penalty.)
+ * woken up with their sleep average not weighted in an
+ * interactive way.
*/
- if (old_state & TASK_NONINTERACTIVE)
- __activate_task(p, rq);
- else
- activate_task(p, rq, cpu == this_cpu);
+ if (old_state & TASK_NONINTERACTIVE)
+ p->sleep_type = SLEEP_NONINTERACTIVE;
+
+
+ activate_task(p, rq, cpu == this_cpu);
/*
* Sync wakeups (i.e. those types of wakeups where the waker
* has indicated that it will leave the CPU in short order)
@@ -1658,6 +1708,21 @@ unsigned long nr_iowait(void)
return sum;
}
+unsigned long nr_active(void)
+{
+ unsigned long i, running = 0, uninterruptible = 0;
+
+ for_each_online_cpu(i) {
+ running += cpu_rq(i)->nr_running;
+ uninterruptible += cpu_rq(i)->nr_uninterruptible;
+ }
+
+ if (unlikely((long)uninterruptible < 0))
+ uninterruptible = 0;
+
+ return running + uninterruptible;
+}
+
#ifdef CONFIG_SMP
/*
@@ -2467,22 +2532,6 @@ unsigned long long current_sched_time(const task_t *tsk)
}
/*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired:
- */
-#define EXPIRED_STARVING(rq) \
- ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
- (jiffies - (rq)->expired_timestamp >= \
- STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
- ((rq)->curr->static_prio > (rq)->best_expired_prio))
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2617,7 +2666,7 @@ void scheduler_tick(void)
if (!rq->expired_timestamp)
rq->expired_timestamp = jiffies;
- if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+ if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
enqueue_task(p, rq->expired);
if (p->static_prio < rq->best_expired_prio)
rq->best_expired_prio = p->static_prio;
@@ -2860,6 +2909,12 @@ EXPORT_SYMBOL(sub_preempt_count);
#endif
+static inline int interactive_sleep(enum sleep_type sleep_type)
+{
+ return (sleep_type == SLEEP_INTERACTIVE ||
+ sleep_type == SLEEP_INTERRUPTED);
+}
+
/*
* schedule() is the main scheduler function.
*/
@@ -2983,12 +3038,12 @@ go_idle:
queue = array->queue + idx;
next = list_entry(queue->next, task_t, run_list);
- if (!rt_task(next) && next->activated > 0) {
+ if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
unsigned long long delta = now - next->timestamp;
if (unlikely((long long)(now - next->timestamp) < 0))
delta = 0;
- if (next->activated == 1)
+ if (next->sleep_type == SLEEP_INTERACTIVE)
delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
array = next->array;
@@ -2998,10 +3053,9 @@ go_idle:
dequeue_task(next, array);
next->prio = new_prio;
enqueue_task(next, array);
- } else
- requeue_task(next, array);
+ }
}
- next->activated = 0;
+ next->sleep_type = SLEEP_NORMAL;
switch_tasks:
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);