diff options
Diffstat (limited to 'arch/powerpc/platforms/cell/spufs/sched.c')
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 377 |
1 files changed, 275 insertions, 102 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e5b4dd1db286..227968b4779d 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,9 +51,6 @@ struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; - struct list_head active_list[MAX_NUMNODES]; - struct mutex active_mutex[MAX_NUMNODES]; - int nr_active[MAX_NUMNODES]; int nr_waiting; }; @@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold active_mutex poke into + * A lot of places that don't hold list_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. @@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx) { int node = ctx->spu->node; - mutex_lock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } static int __node_allowed(struct spu_context *ctx, int node) @@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -/** - * spu_add_to_active_list - add spu to active list - * @spu: spu to add to the active list - */ -static void spu_add_to_active_list(struct spu *spu) -{ - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - spu_prio->nr_active[node]++; - list_add_tail(&spu->list, &spu_prio->active_list[node]); - mutex_unlock(&spu_prio->active_mutex[node]); -} +static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void __spu_remove_from_active_list(struct spu *spu) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { - list_del_init(&spu->list); - spu_prio->nr_active[spu->node]--; + blocking_notifier_call_chain(&spu_switch_notifier, + ctx ? ctx->object_id : 0, spu); } -/** - * spu_remove_from_active_list - remove spu from active list - * @spu: spu to remove from the active list - */ -static void spu_remove_from_active_list(struct spu *spu) +static void notify_spus_active(void) { - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - __spu_remove_from_active_list(spu); - mutex_unlock(&spu_prio->active_mutex[node]); -} + int node; -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); + /* + * Wake up the active spu_contexts. + * + * When the awakened processes see their "notify_active" flag is set, + * they will call spu_switch_notify(); + */ + for_each_online_node(node) { + struct spu *spu; -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) -{ - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(&spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(&spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(&spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); /** * spu_bind_context - bind spu context to physical spu @@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (ctx->flags & SPU_CREATE_NOSCHED) + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); + if (!list_empty(&ctx->aff_list)) + atomic_inc(&ctx->gang->aff_sched_count); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; @@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; + spu->tgid = current->tgid; spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; @@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; - spu_switch_state(spu, SPU_UTIL_SYSTEM); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *ctx; + struct spu *tmp; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + mutex_lock(&gang->aff_mutex); + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + mutex_unlock(&gang->aff_mutex); + + return gang->aff_ref_spu != NULL; } /** @@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - spu_switch_state(spu, SPU_UTIL_IDLE); - + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); + if (!list_empty(&ctx->aff_list)) + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); @@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; + spu->tgid = 0; ctx->ops = &spu_backing_ops; - ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; @@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) (spu->stats.slb_flt - ctx->stats.slb_flt_base); ctx->stats.class2_intr += (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; } /** @@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu = NULL; - int node = cpu_to_node(raw_smp_processor_id()); - int n; + struct spu *spu; + int node, n; + + if (has_affinity(ctx)) { + node = ctx->gang->aff_ref_spu->node; + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + return NULL; + } + + node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; - spu = spu_alloc_node(node); - if (spu) - break; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); } + + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_init_channels(spu); return spu; } @@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx) if (!node_allowed(ctx, node)) continue; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; if (tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { /* @@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } - spu_remove_from_active_list(spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; @@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); - do { struct spu *spu; @@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); - spu_add_to_active_list(spu); + cbe_spu_info[node].nr_active++; + mutex_unlock(&cbe_spu_info[node].list_mutex); return 0; } @@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) int best; spin_lock(&spu_prio->runq_lock); - best = sched_find_first_bit(spu_prio->bitmap); + best = find_first_bit(spu_prio->bitmap, prio); while (best < prio) { struct list_head *rq = &spu_prio->runq[best]; @@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - spu_remove_from_active_list(spu); + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_unbind_context(spu, ctx); + spu->alloc_state = SPU_FREE; + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + ctx->stats.vol_ctx_switch++; spu->stats.vol_ctx_switch++; - spu_free(spu); + if (new) wake_up(&new->stop_wq); } @@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) */ void spu_deactivate(struct spu_context *ctx) { - /* - * We must never reach this for a nosched context, - * but handle the case gracefull instead of panicing. - */ - if (ctx->flags & SPU_CREATE_NOSCHED) { - WARN_ON(1); - return; - } - __spu_deactivate(ctx, 1, MAX_PRIO); - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); } /** - * spu_yield - yield a physical spu if others are waiting + * spu_yield - yield a physical spu if others are waiting * @ctx: spu context to yield * * Check if there is a higher priority context waiting and if yes @@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx) { if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); - if (__spu_deactivate(ctx, 0, MAX_PRIO)) - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); - else { - spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); - spu_switch_state(ctx->spu, SPU_UTIL_USER); - } + __spu_deactivate(ctx, 0, MAX_PRIO); mutex_unlock(&ctx->state_mutex); } } -static void spusched_tick(struct spu_context *ctx) +static noinline void spusched_tick(struct spu_context *ctx) { if (ctx->flags & SPU_CREATE_NOSCHED) return; @@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx) return; /* - * Unfortunately active_mutex ranks outside of state_mutex, so + * Unfortunately list_mutex ranks outside of state_mutex, so * we have to trylock here. If we fail give the context another * tick and try again. */ @@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - - __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; - spu_free(spu); + spu->alloc_state = SPU_FREE; + cbe_spu_info[spu->node].nr_active--; wake_up(&new->stop_wq); /* * We need to break out of the wait loop in @@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx) * * Return the number of tasks currently running or waiting to run. * - * Note that we don't take runq_lock / active_mutex here. Reading + * Note that we don't take runq_lock / list_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here. */ @@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void) int nr_active = 0, node; for (node = 0; node < MAX_NUMNODES; node++) - nr_active += spu_prio->nr_active[node]; + nr_active += cbe_spu_info[node].nr_active; nr_active += spu_prio->nr_waiting; return nr_active; @@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data) static int spusched_thread(void *unused) { - struct spu *spu, *next; + struct spu *spu; int node; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, next, - &spu_prio->active_list[node], - list) - spusched_tick(spu->ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->ctx) + spusched_tick(spu->ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -751,10 +927,9 @@ int __init spu_sched_init(void) INIT_LIST_HEAD(&spu_prio->runq[i]); __clear_bit(i, spu_prio->bitmap); } - __set_bit(MAX_PRIO, spu_prio->bitmap); for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&spu_prio->active_mutex[i]); - INIT_LIST_HEAD(&spu_prio->active_list[i]); + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); } spin_lock_init(&spu_prio->runq_lock); @@ -783,9 +958,9 @@ int __init spu_sched_init(void) return err; } -void __exit spu_sched_exit(void) +void spu_sched_exit(void) { - struct spu *spu, *tmp; + struct spu *spu; int node; remove_proc_entry("spu_loadavg", NULL); @@ -794,13 +969,11 @@ void __exit spu_sched_exit(void) kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], - list) { - list_del_init(&spu->list); - spu_free(spu); - } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); } kfree(spu_prio); } |