diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-07-23 15:50:49 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-10-16 12:02:48 +0100 |
commit | 2e8463216f4898522c3cba2fb93f9bd4592a9954 (patch) | |
tree | a62191c017cca043be00ba4340be02f94884925a | |
parent | e238e29df1c3a7c9e63a693ee43e95ce50585953 (diff) |
drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistently if called concurrently.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/selftests/mock_gem_device.c | 1 |
3 files changed, 28 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cad418e144f4..a300119e2b5f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3315,10 +3315,15 @@ static void nop_complete_submit_request(struct i915_request *request) void i915_gem_set_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; - GEM_TRACE("start\n"); + mutex_lock(&error->wedge_mutex); + if (test_bit(I915_WEDGED, &error->flags)) { + mutex_unlock(&error->wedge_mutex); + return; + } if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); @@ -3327,8 +3332,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_engine_dump(engine, &p, "%s\n", engine->name); } - if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) - goto out; + GEM_TRACE("start\n"); /* * First, stop submission to hw, but do not yet complete requests by @@ -3388,20 +3392,28 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) i915_gem_reset_finish_engine(engine); } -out: + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); + GEM_TRACE("end\n"); + mutex_unlock(&error->wedge_mutex); - wake_up_all(&i915->gpu_error.reset_queue); + wake_up_all(&error->reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct i915_timeline *tl; + bool ret = false; lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + + if (!test_bit(I915_WEDGED, &error->flags)) return true; + mutex_lock(&error->wedge_mutex); + GEM_TRACE("start\n"); /* @@ -3435,7 +3447,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ if (dma_fence_default_wait(&rq->fence, true, MAX_SCHEDULE_TIMEOUT) < 0) - return false; + goto unlock; } i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); @@ -3458,8 +3470,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); + ret = true; +unlock: + mutex_unlock(&i915->gpu_error.wedge_mutex); - return true; + return ret; } static void @@ -5790,6 +5805,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); + mutex_init(&dev_priv->gpu_error.wedge_mutex); atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ff2652bbb0b0..9b61037baa43 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -270,8 +270,8 @@ struct i915_gpu_error { #define I915_RESET_BACKOFF 0 #define I915_RESET_HANDOFF 1 #define I915_RESET_MODESET 2 +#define I915_RESET_ENGINE 3 #define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; @@ -282,6 +282,8 @@ struct i915_gpu_error { /** Reason for the current *global* reset */ const char *reason; + struct mutex wedge_mutex; /* serialises wedging/unwedging */ + /** * Waitqueue to signal when a hang is detected. Used to for waiters * to release the struct_mutex for the reset to procede. diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index aa4ddae94aca..4a25d2a344f2 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void) init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); + mutex_init(&i915->gpu_error.wedge_mutex); i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) |