diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 131 |
1 files changed, 51 insertions, 80 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fbbe6a487bbb..4f5d00aea716 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -224,64 +224,11 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); } static void guc_submit_wedged_fini(void *arg) @@ -290,9 +237,15 @@ static void guc_submit_wedged_fini(void *arg) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -337,14 +290,12 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) if (err) return err; - err = alloc_submit_wq(guc); - if (err) - return err; - gt->exec_queue_ops = &guc_exec_queue_ops; xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -361,12 +312,14 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; - void *ptr; int i; /* @@ -386,12 +339,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id = ret; for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT)); + if (ret) goto err_release; - } } return 0; @@ -794,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - xe_exec_queue_update_run_ticks(job->q); - trace_xe_sched_job_free(job); xe_sched_job_put(job); } @@ -965,12 +914,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); - u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); - u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + u32 ctx_timestamp, ctx_job_timestamp; u32 timeout_ms = q->sched_props.job_timeout_ms; u32 diff; u64 running_time_ms; + if (!xe_sched_job_started(job)) { + xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id); + + return xe_sched_invalidate_job(job, 2); + } + + ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + /* * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch * possible overflows with a high timeout. @@ -1079,10 +1038,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. + * immediately closed after last fence signaled. Add back to pending + * list so job can be freed and kick scheduler ensuring free job is not + * lost. */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -1095,10 +1057,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) exec_queue_killed_or_banned_or_wedged(q) || exec_queue_destroyed(q); - /* Job hasn't started, can't be timed out */ - if (!skip_timeout_check && !xe_sched_job_started(job)) - goto rearm; - /* * XXX: Sampling timeout doesn't work in wedged mode as we have to * modify scheduling state to read timestamp. We could read the @@ -1268,13 +1226,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1452,8 +1413,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) @@ -1770,8 +1730,13 @@ void xe_guc_submit_stop(struct xe_guc *guc) mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_stop(guc, q); + } mutex_unlock(&guc->submission_state.lock); @@ -1796,6 +1761,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) @@ -1808,8 +1774,13 @@ int xe_guc_submit_start(struct xe_guc *guc) mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to start parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_start(q); + } mutex_unlock(&guc->submission_state.lock); wake_up_all(&guc->ct.wq); |