summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-07-23 15:50:49 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2018-10-16 12:02:48 +0100
commit2e8463216f4898522c3cba2fb93f9bd4592a9954 (patch)
treea62191c017cca043be00ba4340be02f94884925a
parente238e29df1c3a7c9e63a693ee43e95ce50585953 (diff)
drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more consistently if called concurrently. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c32
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h4
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c1
3 files changed, 28 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cad418e144f4..a300119e2b5f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3315,10 +3315,15 @@ static void nop_complete_submit_request(struct i915_request *request)
void i915_gem_set_wedged(struct drm_i915_private *i915)
{
+ struct i915_gpu_error *error = &i915->gpu_error;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- GEM_TRACE("start\n");
+ mutex_lock(&error->wedge_mutex);
+ if (test_bit(I915_WEDGED, &error->flags)) {
+ mutex_unlock(&error->wedge_mutex);
+ return;
+ }
if (GEM_SHOW_DEBUG()) {
struct drm_printer p = drm_debug_printer(__func__);
@@ -3327,8 +3332,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
- if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
- goto out;
+ GEM_TRACE("start\n");
/*
* First, stop submission to hw, but do not yet complete requests by
@@ -3388,20 +3392,28 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
i915_gem_reset_finish_engine(engine);
}
-out:
+ smp_mb__before_atomic();
+ set_bit(I915_WEDGED, &error->flags);
+
GEM_TRACE("end\n");
+ mutex_unlock(&error->wedge_mutex);
- wake_up_all(&i915->gpu_error.reset_queue);
+ wake_up_all(&error->reset_queue);
}
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
{
+ struct i915_gpu_error *error = &i915->gpu_error;
struct i915_timeline *tl;
+ bool ret = false;
lockdep_assert_held(&i915->drm.struct_mutex);
- if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+ if (!test_bit(I915_WEDGED, &error->flags))
return true;
+ mutex_lock(&error->wedge_mutex);
+
GEM_TRACE("start\n");
/*
@@ -3435,7 +3447,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*/
if (dma_fence_default_wait(&rq->fence, true,
MAX_SCHEDULE_TIMEOUT) < 0)
- return false;
+ goto unlock;
}
i915_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests);
@@ -3458,8 +3470,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+ ret = true;
+unlock:
+ mutex_unlock(&i915->gpu_error.wedge_mutex);
- return true;
+ return ret;
}
static void
@@ -5790,6 +5805,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
i915_gem_idle_work_handler);
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+ mutex_init(&dev_priv->gpu_error.wedge_mutex);
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index ff2652bbb0b0..9b61037baa43 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -270,8 +270,8 @@ struct i915_gpu_error {
#define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1
#define I915_RESET_MODESET 2
+#define I915_RESET_ENGINE 3
#define I915_WEDGED (BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
/** Number of times an engine has been reset */
u32 reset_engine_count[I915_NUM_ENGINES];
@@ -282,6 +282,8 @@ struct i915_gpu_error {
/** Reason for the current *global* reset */
const char *reason;
+ struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
* to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index aa4ddae94aca..4a25d2a344f2 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
init_waitqueue_head(&i915->gpu_error.wait_queue);
init_waitqueue_head(&i915->gpu_error.reset_queue);
+ mutex_init(&i915->gpu_error.wedge_mutex);
i915->wq = alloc_ordered_workqueue("mock", 0);
if (!i915->wq)