summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c624
1 files changed, 365 insertions, 259 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0adb879833ff..ebf8023d21e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -230,8 +230,6 @@ enum {
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
static void execlists_init_reg_state(u32 *reg_state,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
@@ -275,8 +273,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
engine->disable_lite_restore_wa =
- (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+ IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) &&
(engine->id == VCS || engine->id == VCS2);
engine->ctx_desc_template = GEN8_CTX_VALID;
@@ -363,10 +360,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
static u64 execlists_update_context(struct drm_i915_gem_request *rq)
{
struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
- struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+ struct i915_hw_ppgtt *ppgtt =
+ rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+ reg_state[CTX_RING_TAIL+1] = rq->tail;
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
@@ -416,7 +414,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- ctx->execlists_force_single_submission);
+ i915_gem_context_force_single_submission(ctx));
}
static bool can_merge_ctx(const struct i915_gem_context *prev,
@@ -433,15 +431,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
static void execlists_dequeue(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *cursor, *last;
+ struct drm_i915_gem_request *last;
struct execlist_port *port = engine->execlist_port;
+ unsigned long flags;
+ struct rb_node *rb;
bool submit = false;
last = port->request;
if (last)
/* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
- * as we resubmit the request. See gen8_emit_request()
+ * as we resubmit the request. See gen8_emit_breadcrumb()
* for where we prepare the padding after the end of the
* request.
*/
@@ -470,8 +470,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
- spin_lock(&engine->execlist_lock);
- list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+ rb = engine->execlist_first;
+ while (rb) {
+ struct drm_i915_gem_request *cursor =
+ rb_entry(rb, typeof(*cursor), priotree.node);
+
/* Can we combine this request with the current port? It has to
* be the same context/ringbuffer and not have any exceptions
* (e.g. GVT saying never to combine contexts).
@@ -494,7 +498,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* context (even though a different request) to
* the second port.
*/
- if (ctx_single_port_submission(cursor->ctx))
+ if (ctx_single_port_submission(last->ctx) ||
+ ctx_single_port_submission(cursor->ctx))
break;
GEM_BUG_ON(last->ctx == cursor->ctx);
@@ -502,17 +507,21 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
i915_gem_request_assign(&port->request, last);
port++;
}
+
+ rb = rb_next(rb);
+ rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+ RB_CLEAR_NODE(&cursor->priotree.node);
+ cursor->priotree.priority = INT_MAX;
+
+ __i915_gem_request_submit(cursor);
last = cursor;
submit = true;
}
if (submit) {
- /* Decouple all the requests submitted from the queue */
- engine->execlist_queue.next = &cursor->execlist_link;
- cursor->execlist_link.prev = &engine->execlist_queue;
-
i915_gem_request_assign(&port->request, last);
+ engine->execlist_first = rb;
}
- spin_unlock(&engine->execlist_lock);
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
if (submit)
execlists_submit_ports(engine);
@@ -523,6 +532,28 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine)
return !engine->execlist_port[0].request;
}
+/**
+ * intel_execlists_idle() - Determine if all engine submission ports are idle
+ * @dev_priv: i915 device private
+ *
+ * Return true if there are no requests pending on any of the submission ports
+ * of any engines.
+ */
+bool intel_execlists_idle(struct drm_i915_private *dev_priv)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (!i915.enable_execlists)
+ return true;
+
+ for_each_engine(engine, dev_priv, id)
+ if (!execlists_elsp_idle(engine))
+ return false;
+
+ return true;
+}
+
static bool execlists_elsp_ready(struct intel_engine_cs *engine)
{
int port;
@@ -593,129 +624,151 @@ static void intel_lrc_irq_handler(unsigned long data)
intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
}
+static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+{
+ struct rb_node **p, *rb;
+ bool first = true;
+
+ /* most positive priority is scheduled first, equal priorities fifo */
+ rb = NULL;
+ p = &root->rb_node;
+ while (*p) {
+ struct i915_priotree *pos;
+
+ rb = *p;
+ pos = rb_entry(rb, typeof(*pos), node);
+ if (pt->priority > pos->priority) {
+ p = &rb->rb_left;
+ } else {
+ p = &rb->rb_right;
+ first = false;
+ }
+ }
+ rb_link_node(&pt->node, rb, p);
+ rb_insert_color(&pt->node, root);
+
+ return first;
+}
+
static void execlists_submit_request(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
- spin_lock_irqsave(&engine->execlist_lock, flags);
+ /* Will be called from irq-context when using foreign fences. */
+ spin_lock_irqsave(&engine->timeline->lock, flags);
- list_add_tail(&request->execlist_link, &engine->execlist_queue);
+ if (insert_request(&request->priotree, &engine->execlist_queue))
+ engine->execlist_first = &request->priotree.node;
if (execlists_elsp_idle(engine))
tasklet_hi_schedule(&engine->irq_tasklet);
- spin_unlock_irqrestore(&engine->execlist_lock, flags);
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
-int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
+static struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
- struct intel_engine_cs *engine = request->engine;
- struct intel_context *ce = &request->ctx->engine[engine->id];
- int ret;
-
- /* Flush enough space to reduce the likelihood of waiting after
- * we start building the request - in which case we will just
- * have to repeat work.
- */
- request->reserved_space += EXECLISTS_REQUEST_SIZE;
+ struct intel_engine_cs *engine;
- if (!ce->state) {
- ret = execlists_context_deferred_alloc(request->ctx, engine);
- if (ret)
- return ret;
+ engine = container_of(pt,
+ struct drm_i915_gem_request,
+ priotree)->engine;
+ if (engine != locked) {
+ if (locked)
+ spin_unlock_irq(&locked->timeline->lock);
+ spin_lock_irq(&engine->timeline->lock);
}
- request->ring = ce->ring;
+ return engine;
+}
- ret = intel_lr_context_pin(request->ctx, engine);
- if (ret)
- return ret;
+static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
+{
+ struct intel_engine_cs *engine = NULL;
+ struct i915_dependency *dep, *p;
+ struct i915_dependency stack;
+ LIST_HEAD(dfs);
- if (i915.enable_guc_submission) {
- /*
- * Check that the GuC has space for the request before
- * going any further, as the i915_add_request() call
- * later on mustn't fail ...
- */
- ret = i915_guc_wq_reserve(request);
- if (ret)
- goto err_unpin;
- }
+ if (prio <= READ_ONCE(request->priotree.priority))
+ return;
- ret = intel_ring_begin(request, 0);
- if (ret)
- goto err_unreserve;
+ /* Need BKL in order to use the temporary link inside i915_dependency */
+ lockdep_assert_held(&request->i915->drm.struct_mutex);
- if (!ce->initialised) {
- ret = engine->init_context(request);
- if (ret)
- goto err_unreserve;
+ stack.signaler = &request->priotree;
+ list_add(&stack.dfs_link, &dfs);
- ce->initialised = true;
+ /* Recursively bump all dependent priorities to match the new request.
+ *
+ * A naive approach would be to use recursion:
+ * static void update_priorities(struct i915_priotree *pt, prio) {
+ * list_for_each_entry(dep, &pt->signalers_list, signal_link)
+ * update_priorities(dep->signal, prio)
+ * insert_request(pt);
+ * }
+ * but that may have unlimited recursion depth and so runs a very
+ * real risk of overunning the kernel stack. Instead, we build
+ * a flat list of all dependencies starting with the current request.
+ * As we walk the list of dependencies, we add all of its dependencies
+ * to the end of the list (this may include an already visited
+ * request) and continue to walk onwards onto the new dependencies. The
+ * end result is a topological list of requests in reverse order, the
+ * last element in the list is the request we must execute first.
+ */
+ list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
+ struct i915_priotree *pt = dep->signaler;
+
+ list_for_each_entry(p, &pt->signalers_list, signal_link)
+ if (prio > READ_ONCE(p->signaler->priority))
+ list_move_tail(&p->dfs_link, &dfs);
+
+ list_safe_reset_next(dep, p, dfs_link);
+ if (!RB_EMPTY_NODE(&pt->node))
+ continue;
+
+ engine = pt_lock_engine(pt, engine);
+
+ /* If it is not already in the rbtree, we can update the
+ * priority inplace and skip over it (and its dependencies)
+ * if it is referenced *again* as we descend the dfs.
+ */
+ if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
+ pt->priority = prio;
+ list_del_init(&dep->dfs_link);
+ }
}
- /* Note that after this point, we have committed to using
- * this request as it is being used to both track the
- * state of engine initialisation and liveness of the
- * golden renderstate above. Think twice before you try
- * to cancel/unwind this request now.
- */
+ /* Fifo and depth-first replacement ensure our deps execute before us */
+ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
+ struct i915_priotree *pt = dep->signaler;
- request->reserved_space -= EXECLISTS_REQUEST_SIZE;
- return 0;
+ INIT_LIST_HEAD(&dep->dfs_link);
-err_unreserve:
- if (i915.enable_guc_submission)
- i915_guc_wq_unreserve(request);
-err_unpin:
- intel_lr_context_unpin(request->ctx, engine);
- return ret;
-}
+ engine = pt_lock_engine(pt, engine);
-/*
- * intel_logical_ring_advance() - advance the tail and prepare for submission
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static int
-intel_logical_ring_advance(struct drm_i915_gem_request *request)
-{
- struct intel_ring *ring = request->ring;
- struct intel_engine_cs *engine = request->engine;
+ if (prio <= pt->priority)
+ continue;
- intel_ring_advance(ring);
- request->tail = ring->tail;
+ GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
- /*
- * Here we add two extra NOOPs as padding to avoid
- * lite restore of a context with HEAD==TAIL.
- *
- * Caller must reserve WA_TAIL_DWORDS for us!
- */
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
- request->wa_tail = ring->tail;
+ pt->priority = prio;
+ rb_erase(&pt->node, &engine->execlist_queue);
+ if (insert_request(pt, &engine->execlist_queue))
+ engine->execlist_first = &pt->node;
+ }
- /* We keep the previous context alive until we retire the following
- * request. This ensures that any the context object is still pinned
- * for any residual writes the HW makes into it on the context switch
- * into the next object following the breadcrumb. Otherwise, we may
- * retire the context too early.
- */
- request->previous_context = engine->last_context;
- engine->last_context = request->ctx;
- return 0;
+ if (engine)
+ spin_unlock_irq(&engine->timeline->lock);
+
+ /* XXX Do we need to preempt to make room for us and our deps? */
}
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static int execlists_context_pin(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx)
{
struct intel_context *ce = &ctx->engine[engine->id];
+ unsigned int flags;
void *vaddr;
int ret;
@@ -724,8 +777,20 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
if (ce->pin_count++)
return 0;
- ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
- PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
+ if (!ce->state) {
+ ret = execlists_context_deferred_alloc(ctx, engine);
+ if (ret)
+ goto err;
+ }
+ GEM_BUG_ON(!ce->state);
+
+ flags = PIN_GLOBAL;
+ if (ctx->ggtt_offset_bias)
+ flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias;
+ if (i915_gem_context_is_kernel(ctx))
+ flags |= PIN_HIGH;
+
+ ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags);
if (ret)
goto err;
@@ -735,7 +800,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
goto unpin_vma;
}
- ret = intel_ring_pin(ce->ring);
+ ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias);
if (ret)
goto unpin_map;
@@ -745,13 +810,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
i915_ggtt_offset(ce->ring->vma);
- ce->state->obj->dirty = true;
-
- /* Invalidate GuC TLB. */
- if (i915.enable_guc_submission) {
- struct drm_i915_private *dev_priv = ctx->i915;
- I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
- }
+ ce->state->obj->mm.dirty = true;
i915_gem_context_get(ctx);
return 0;
@@ -765,8 +824,8 @@ err:
return ret;
}
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static void execlists_context_unpin(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx)
{
struct intel_context *ce = &ctx->engine[engine->id];
@@ -784,6 +843,63 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
i915_gem_context_put(ctx);
}
+static int execlists_request_alloc(struct drm_i915_gem_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_context *ce = &request->ctx->engine[engine->id];
+ int ret;
+
+ GEM_BUG_ON(!ce->pin_count);
+
+ /* Flush enough space to reduce the likelihood of waiting after
+ * we start building the request - in which case we will just
+ * have to repeat work.
+ */
+ request->reserved_space += EXECLISTS_REQUEST_SIZE;
+
+ GEM_BUG_ON(!ce->ring);
+ request->ring = ce->ring;
+
+ if (i915.enable_guc_submission) {
+ /*
+ * Check that the GuC has space for the request before
+ * going any further, as the i915_add_request() call
+ * later on mustn't fail ...
+ */
+ ret = i915_guc_wq_reserve(request);
+ if (ret)
+ goto err;
+ }
+
+ ret = intel_ring_begin(request, 0);
+ if (ret)
+ goto err_unreserve;
+
+ if (!ce->initialised) {
+ ret = engine->init_context(request);
+ if (ret)
+ goto err_unreserve;
+
+ ce->initialised = true;
+ }
+
+ /* Note that after this point, we have committed to using
+ * this request as it is being used to both track the
+ * state of engine initialisation and liveness of the
+ * golden renderstate above. Think twice before you try
+ * to cancel/unwind this request now.
+ */
+
+ request->reserved_space -= EXECLISTS_REQUEST_SIZE;
+ return 0;
+
+err_unreserve:
+ if (i915.enable_guc_submission)
+ i915_guc_wq_unreserve(request);
+err:
+ return ret;
+}
+
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
{
int ret, i;
@@ -849,19 +965,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
uint32_t *batch,
uint32_t index)
{
- struct drm_i915_private *dev_priv = engine->i915;
uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
- /*
- * WaDisableLSQCROPERFforOCL:skl,kbl
- * This WA is implemented in skl_init_clock_gating() but since
- * this batch updates GEN8_L3SQCREG4 with default value we need to
- * set this bit here to retain the WA during flush.
- */
- if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
- IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
- l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
-
wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
@@ -1002,9 +1107,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
struct drm_i915_private *dev_priv = engine->i915;
uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
- /* WaDisableCtxRestoreArbitration:skl,bxt */
- if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
+ /* WaDisableCtxRestoreArbitration:bxt */
+ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1075,9 +1179,8 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
{
uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
- /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
- if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) {
+ /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
+ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) {
wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
wa_ctx_emit(batch, index,
@@ -1104,9 +1207,8 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, MI_NOOP);
}
- /* WaDisableCtxRestoreArbitration:skl,bxt */
- if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_D0) ||
- IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
+ /* WaDisableCtxRestoreArbitration:bxt */
+ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
@@ -1120,11 +1222,11 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
struct i915_vma *vma;
int err;
- obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size));
+ obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size));
if (IS_ERR(obj))
return PTR_ERR(obj);
- vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
@@ -1218,15 +1320,6 @@ out:
return ret;
}
-static void lrc_init_hws(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- I915_WRITE(RING_HWS_PGA(engine->mmio_base),
- engine->status_page.ggtt_offset);
- POSTING_READ(RING_HWS_PGA(engine->mmio_base));
-}
-
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1236,22 +1329,25 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
if (ret)
return ret;
- lrc_init_hws(engine);
-
intel_engine_reset_breadcrumbs(engine);
+ intel_engine_init_hangcheck(engine);
I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
-
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+ I915_WRITE(RING_HWS_PGA(engine->mmio_base),
+ engine->status_page.ggtt_offset);
+ POSTING_READ(RING_HWS_PGA(engine->mmio_base));
DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
- intel_engine_init_hangcheck(engine);
-
- if (!execlists_elsp_idle(engine))
+ /* After a GPU reset, we may have requests to replay */
+ if (!execlists_elsp_idle(engine)) {
+ engine->execlist_port[0].count = 0;
+ engine->execlist_port[1].count = 0;
execlists_submit_ports(engine);
+ }
return 0;
}
@@ -1294,7 +1390,20 @@ static void reset_common_ring(struct intel_engine_cs *engine,
{
struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port;
- struct intel_context *ce = &request->ctx->engine[engine->id];
+ struct intel_context *ce;
+
+ /* If the request was innocent, we leave the request in the ELSP
+ * and will try to replay it on restarting. The context image may
+ * have been corrupted by the reset, in which case we may have
+ * to service a new GPU hang, but more likely we can continue on
+ * without impact.
+ *
+ * If the request was guilty, we presume the context is corrupt
+ * and have to at least restore the RING register in the context
+ * image back to the expected values to skip over the guilty request.
+ */
+ if (!request || request->fence.error != -EIO)
+ return;
/* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
@@ -1303,6 +1412,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
+ ce = &request->ctx->engine[engine->id];
execlists_init_reg_state(ce->lrc_reg_state,
request->ctx, engine, ce->ring);
@@ -1326,10 +1436,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
memset(&port[1], 0, sizeof(port[1]));
}
- /* CS is stopped, and we will resubmit both ports on resume */
GEM_BUG_ON(request->ctx != port[0].request->ctx);
- port[0].count = 0;
- port[1].count = 0;
/* Reset WaIdleLiteRestore:bdw,skl as well */
request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
@@ -1570,39 +1677,35 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
-
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out)
{
- struct intel_ring *ring = request->ring;
- int ret;
-
- ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
+ *out++ = MI_NOOP;
+ *out++ = MI_NOOP;
+ request->wa_tail = intel_ring_offset(request->ring, out);
+}
+static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request,
+ u32 *out)
+{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
- intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
- intel_ring_emit(ring,
- intel_hws_seqno_address(request->engine) |
- MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->fence.seqno);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
-}
+ *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
+ *out++ = 0;
+ *out++ = request->global_seqno;
+ *out++ = MI_USER_INTERRUPT;
+ *out++ = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out);
-static int gen8_emit_request_render(struct drm_i915_gem_request *request)
-{
- struct intel_ring *ring = request->ring;
- int ret;
+ gen8_emit_wa_tail(request, out);
+}
- ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
+static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
+ u32 *out)
+{
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -1610,21 +1713,24 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring,
- (PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE));
- intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+ *out++ = GFX_OP_PIPE_CONTROL(6);
+ *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE);
+ *out++ = intel_hws_seqno_address(request->engine);
+ *out++ = 0;
+ *out++ = request->global_seqno;
/* We're thrashing one dword of HWS. */
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
+ *out++ = 0;
+ *out++ = MI_USER_INTERRUPT;
+ *out++ = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out);
+
+ gen8_emit_wa_tail(request, out);
}
+static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
{
int ret;
@@ -1641,7 +1747,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
if (ret)
DRM_ERROR("MOCS failed to program: expect performance issues.\n");
- return i915_gem_render_state_init(req);
+ return i915_gem_render_state_emit(req);
}
/**
@@ -1652,9 +1758,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv;
- if (!intel_engine_initialized(engine))
- return;
-
/*
* Tasklet cannot be active at this point due intel_mark_active/idle
* so this is just for documentation.
@@ -1671,24 +1774,28 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
if (engine->cleanup)
engine->cleanup(engine);
- intel_engine_cleanup_common(engine);
-
if (engine->status_page.vma) {
i915_gem_object_unpin_map(engine->status_page.vma->obj);
engine->status_page.vma = NULL;
}
- intel_lr_context_unpin(dev_priv->kernel_context, engine);
+
+ intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx_obj(engine);
engine->i915 = NULL;
+ dev_priv->engine[engine->id] = NULL;
+ kfree(engine);
}
void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
+ enum intel_engine_id id;
- for_each_engine(engine, dev_priv)
+ for_each_engine(engine, dev_priv, id) {
engine->submit_request = execlists_submit_request;
+ engine->schedule = execlists_schedule;
+ }
}
static void
@@ -1697,9 +1804,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
/* Default vfuncs which can be overriden by each engine. */
engine->init_hw = gen8_init_common_ring;
engine->reset_hw = reset_common_ring;
+
+ engine->context_pin = execlists_context_pin;
+ engine->context_unpin = execlists_context_unpin;
+
+ engine->request_alloc = execlists_request_alloc;
+
engine->emit_flush = gen8_emit_flush;
- engine->emit_request = gen8_emit_request;
+ engine->emit_breadcrumb = gen8_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
engine->submit_request = execlists_submit_request;
+ engine->schedule = execlists_schedule;
engine->irq_enable = gen8_logical_ring_enable_irq;
engine->irq_disable = gen8_logical_ring_disable_irq;
@@ -1777,18 +1892,6 @@ logical_ring_init(struct intel_engine_cs *engine)
if (ret)
goto error;
- ret = execlists_context_deferred_alloc(dctx, engine);
- if (ret)
- goto error;
-
- /* As this is the default context, always pin it */
- ret = intel_lr_context_pin(dctx, engine);
- if (ret) {
- DRM_ERROR("Failed to pin context for %s: %d\n",
- engine->name, ret);
- goto error;
- }
-
/* And setup the hardware status page. */
ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
if (ret) {
@@ -1820,9 +1923,10 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
engine->init_hw = gen8_init_render_ring;
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
- engine->emit_request = gen8_emit_request_render;
+ engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
+ engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
- ret = intel_engine_create_scratch(engine, 4096);
+ ret = intel_engine_create_scratch(engine, PAGE_SIZE);
if (ret)
return ret;
@@ -1837,12 +1941,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
ret);
}
- ret = logical_ring_init(engine);
- if (ret) {
- lrc_destroy_wa_ctx_obj(engine);
- }
-
- return ret;
+ return logical_ring_init(engine);
}
int logical_xcs_ring_init(struct intel_engine_cs *engine)
@@ -1945,7 +2044,7 @@ static void execlists_init_reg_state(u32 *reg_state,
RING_START(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
RING_CTL(engine->mmio_base),
- ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
+ RING_CTL_SIZE(ring->size) | RING_VALID);
ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
RING_BBADDR_UDW(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
@@ -2003,19 +2102,12 @@ static void execlists_init_reg_state(u32 *reg_state,
ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0),
0);
- if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+ if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
* other PDP Descriptors are ignored.
*/
ASSIGN_CTX_PML4(ppgtt, reg_state);
- } else {
- /* 32b PPGTT
- * PDP*_DESCRIPTOR contains the base address of space supported.
- * With dynamic page allocation, PDPs may not be allocated at
- * this point. Point the unallocated PDPs to the scratch page
- */
- execlists_update_context_pdps(ppgtt, reg_state);
}
if (engine->id == RCS) {
@@ -2046,7 +2138,7 @@ populate_lr_context(struct i915_gem_context *ctx,
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
return ret;
}
- ctx_obj->dirty = true;
+ ctx_obj->mm.dirty = true;
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
@@ -2109,18 +2201,19 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
WARN_ON(ce->state);
- context_size = round_up(intel_lr_context_size(engine), 4096);
+ context_size = round_up(intel_lr_context_size(engine),
+ I915_GTT_PAGE_SIZE);
/* One extra page as the sharing data between driver and GuC */
context_size += PAGE_SIZE * LRC_PPHWSP_PN;
- ctx_obj = i915_gem_object_create(&ctx->i915->drm, context_size);
+ ctx_obj = i915_gem_object_create(ctx->i915, context_size);
if (IS_ERR(ctx_obj)) {
DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
return PTR_ERR(ctx_obj);
}
- vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
@@ -2153,30 +2246,43 @@ error_deref_obj:
void intel_lr_context_resume(struct drm_i915_private *dev_priv)
{
- struct i915_gem_context *ctx = dev_priv->kernel_context;
struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+
+ /* Because we emit WA_TAIL_DWORDS there may be a disparity
+ * between our bookkeeping in ce->ring->head and ce->ring->tail and
+ * that stored in context. As we only write new commands from
+ * ce->ring->tail onwards, everything before that is junk. If the GPU
+ * starts reading from its RING_HEAD from the context, it may try to
+ * execute that junk and die.
+ *
+ * So to avoid that we reset the context images upon resume. For
+ * simplicity, we just zero everything out.
+ */
+ list_for_each_entry(ctx, &dev_priv->context_list, link) {
+ for_each_engine(engine, dev_priv, id) {
+ struct intel_context *ce = &ctx->engine[engine->id];
+ u32 *reg;
- for_each_engine(engine, dev_priv) {
- struct intel_context *ce = &ctx->engine[engine->id];
- void *vaddr;
- uint32_t *reg_state;
-
- if (!ce->state)
- continue;
-
- vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
- if (WARN_ON(IS_ERR(vaddr)))
- continue;
+ if (!ce->state)
+ continue;
- reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ reg = i915_gem_object_pin_map(ce->state->obj,
+ I915_MAP_WB);
+ if (WARN_ON(IS_ERR(reg)))
+ continue;
- reg_state[CTX_RING_HEAD+1] = 0;
- reg_state[CTX_RING_TAIL+1] = 0;
+ reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
+ reg[CTX_RING_HEAD+1] = 0;
+ reg[CTX_RING_TAIL+1] = 0;
- ce->state->obj->dirty = true;
- i915_gem_object_unpin_map(ce->state->obj);
+ ce->state->obj->mm.dirty = true;
+ i915_gem_object_unpin_map(ce->state->obj);
- ce->ring->head = 0;
- ce->ring->tail = 0;
+ ce->ring->head = ce->ring->tail = 0;
+ ce->ring->last_retired_head = -1;
+ intel_ring_update_space(ce->ring);
+ }
}
}