diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 386 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 33 |
2 files changed, 312 insertions, 107 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7a996436fb4c..9a70f1cfea90 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -803,7 +803,7 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct i915_guc_client *client = dev_priv->guc.execbuf_client; static const bool fake = false; /* true => only pretend to preempt */ - bool preemptive = false; /* for now */ + bool preemptive; intel_logical_ring_advance(ringbuf); request->tail = ringbuf->tail; @@ -832,6 +832,7 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) } } + preemptive = (request->scheduler_flags & I915_REQ_SF_PREEMPT) != 0; if (preemptive && dev_priv->guc.preempt_client && !fake) client = dev_priv->guc.preempt_client; @@ -1028,6 +1029,184 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, } /* + * This function stores the specified constant value in the (index)th DWORD + * of the hardware status page (execlist mode only). See separate code for + * legacy mode. + */ +static void +emit_store_dw_index(struct drm_i915_gem_request *req, uint32_t value, + uint32_t index) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + uint64_t hwpa = req->engine->status_page.gfx_addr; + + hwpa += index << MI_STORE_DWORD_INDEX_SHIFT; + + intel_logical_ring_emit(ringbuf, MI_STORE_DWORD_IMM_GEN4 | + MI_GLOBAL_GTT); + intel_logical_ring_emit(ringbuf, lower_32_bits(hwpa)); + intel_logical_ring_emit(ringbuf, upper_32_bits(hwpa)); /* GEN8+ */ + intel_logical_ring_emit(ringbuf, value); + + req->engine->gpu_caches_dirty = true; +} + +#if 0 +/* + * This function stores the specified register value in the (index)th DWORD + * of the hardware status page (execlist mode only). See separate code for + * legacy mode. + */ +static void +emit_store_reg_index(struct drm_i915_gem_request *req, i915_reg_t reg, + uint32_t index) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + uint64_t hwpa = req->engine->status_page.gfx_addr; + + hwpa += index << MI_STORE_DWORD_INDEX_SHIFT; + + intel_logical_ring_emit(ringbuf, (MI_STORE_REG_MEM+1) | MI_GLOBAL_GTT); + intel_logical_ring_emit_reg(ringbuf, reg); + intel_logical_ring_emit(ringbuf, lower_32_bits(hwpa)); + intel_logical_ring_emit(ringbuf, upper_32_bits(hwpa)); /* GEN8+ */ + + req->engine->gpu_caches_dirty = true; +} +#endif /* 0 */ + +/* + * This function stores the two specified values in the (index)th DWORD + * and the following DWORD of the hardware status page (execlist mode only). + * See separate code for legacy mode. + */ +static int +gen8_emit_flush_qw_store_index(struct drm_i915_gem_request *request, + uint32_t flags, uint32_t index, + uint32_t data1, uint32_t data2) +{ + struct intel_ringbuffer *ringbuf = request->ringbuf; + uint32_t cmd; + int ret; + + cmd = MI_FLUSH_DW; + cmd += 2; /* 64-bit address and data */ + cmd |= MI_FLUSH_DW_OP_STOREDW; /* Store {D,Q}Word as post-op */ + cmd |= MI_FLUSH_DW_STORE_INDEX; /* Address is relative to HWSP */ + cmd |= flags; /* Extra (invalidate) bits */ + + /* The address must be QWord aligned (index must be EVEN) */ + index <<= MI_STORE_DWORD_INDEX_SHIFT; + if (WARN_ON_ONCE(index & 7)) + return -EINVAL; + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW QWord address. */ + if (WARN_ON_ONCE(index & (1 << 5))) + return -EINVAL; + index |= MI_FLUSH_DW_USE_GTT; + + ret = intel_logical_ring_begin(request, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, index); + intel_logical_ring_emit(ringbuf, 0); /* upper_32_bits(index) */ + intel_logical_ring_emit(ringbuf, data1); + intel_logical_ring_emit(ringbuf, data2); + + intel_logical_ring_emit(ringbuf, MI_NOOP); + + intel_logical_ring_advance(ringbuf); + return 0; +} + +/* + * This function stores the two specified values in the (index)th DWORD + * and the following DWORD of the hardware status page (execlist mode only). + * See separate code for legacy mode. + */ +static int +gen8_emit_pipe_control_qw_store_index(struct drm_i915_gem_request *request, + uint32_t flags, uint32_t index, + uint32_t data1, uint32_t data2) +{ + struct intel_ringbuffer *ringbuf = request->ringbuf; + uint32_t cmd, opts; + int ret; + + cmd = GFX_OP_PIPE_CONTROL(6); + + opts = PIPE_CONTROL_GLOBAL_GTT_IVB; /* Address via GGTT */ + opts |= PIPE_CONTROL_STORE_DATA_INDEX; /* Index into HWSP */ + opts |= PIPE_CONTROL_CS_STALL; /* Stall CS until done */ + opts |= PIPE_CONTROL_QW_WRITE; /* Write QWord */ + opts |= flags; /* Extra flag bits */ + + /* The address must be QWord aligned (index must be EVEN) */ + index <<= MI_STORE_DWORD_INDEX_SHIFT; + if (WARN_ON_ONCE(index & 7)) + return -EINVAL; + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW QWord address. */ + if (WARN_ON_ONCE(index & (1 << 5))) + return -EINVAL; + + ret = intel_logical_ring_begin(request, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, opts); + intel_logical_ring_emit(ringbuf, index); + intel_logical_ring_emit(ringbuf, 0); /* upper_32_bits(index) */ + intel_logical_ring_emit(ringbuf, data1); + intel_logical_ring_emit(ringbuf, data2); + + intel_logical_ring_advance(ringbuf); + return 0; +} + +/* + * Emit the commands to execute when preparing to start a batch + * + * The GPU will log the seqno of the batch before it starts + * running any of the commands to actually execute that batch + */ +static void +emit_preamble(struct drm_i915_gem_request *req) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + uint32_t seqno = i915_gem_request_get_seqno(req); + + WARN_ON(!seqno); + + if (req->scheduler_flags & I915_REQ_SF_PREEMPT) + emit_store_dw_index(req, seqno, I915_PREEMPTIVE_ACTIVE_SEQNO); + else + emit_store_dw_index(req, seqno, I915_BATCH_ACTIVE_SEQNO); + + intel_logical_ring_emit(ringbuf, MI_REPORT_HEAD); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + req->engine->gpu_caches_dirty = true; +} + +static void +emit_relconsts_mode(struct i915_execbuffer_params *params) +{ + if (params->instp_mode != params->ctx->relative_constants_mode) { + struct intel_ringbuffer *ringbuf = params->request->ringbuf; + uint32_t val = params->instp_mask << 16 | params->instp_mode; + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit_reg(ringbuf, INSTPM); + intel_logical_ring_emit(ringbuf, val); + + params->ctx->relative_constants_mode = params->instp_mode; + } +} + +/* * This is the main function for sending a batch to the engine. * It is called from the scheduler, with the struct_mutex already held. */ @@ -1112,6 +1291,11 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) req->head = intel_ring_get_tail(ringbuf); /* + * Log the seqno of the batch we're starting + */ + emit_preamble(req); + + /* * Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ @@ -1119,25 +1303,20 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) if (ret) goto err; - if (engine == &dev_priv->engine[RCS] && - params->instp_mode != params->ctx->relative_constants_mode) { - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_logical_ring_emit_reg(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, params->instp_mask << 16 | params->instp_mode); - intel_logical_ring_advance(ringbuf); + if (!(req->scheduler_flags & I915_REQ_SF_PREEMPT)) { + if (engine == &dev_priv->engine[RCS]) + emit_relconsts_mode(params); - params->ctx->relative_constants_mode = params->instp_mode; - } + exec_start = params->batch_obj_vm_offset + + params->args_batch_start_offset; - exec_start = params->batch_obj_vm_offset + - params->args_batch_start_offset; - - ret = engine->emit_bb_start(req, exec_start, params->dispatch_flags); - if (ret) - goto err; + ret = engine->emit_bb_start(req, exec_start, + params->dispatch_flags); + if (ret) + goto err; - trace_i915_gem_ring_dispatch(req, params->dispatch_flags); + trace_i915_gem_ring_dispatch(req, params->dispatch_flags); + } i915_gem_execbuffer_retire_commands(params); @@ -1881,41 +2060,30 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 unused) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - struct drm_device *dev = engine->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t cmd; - int ret; + uint32_t flags = 0; - ret = intel_logical_ring_begin(request, 4); - if (ret) - return ret; - - cmd = MI_FLUSH_DW + 1; - - /* We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). + /* + * We always require a command barrier so that subsequent commands, + * such as breadcrumb interrupts, are strictly ordered w.r.t the + * contents of the write cache being flushed to memory (and thus + * being coherent from the CPU). */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; if (invalidate_domains & I915_GEM_GPU_DOMAINS) { - cmd |= MI_INVALIDATE_TLB; - if (engine == &dev_priv->engine[VCS]) - cmd |= MI_INVALIDATE_BSD; + struct drm_i915_private *dev_priv = request->i915; + + if (request->engine == &dev_priv->engine[VCS]) + flags |= MI_INVALIDATE_BSD; + flags |= MI_INVALIDATE_TLB; } - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); /* upper addr */ - intel_logical_ring_emit(ringbuf, 0); /* value */ - intel_logical_ring_advance(ringbuf); + /* Index must be QWord aligned */ + BUILD_BUG_ON(I915_GEM_HWS_SCRATCH_INDEX & 1); + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW QWord address. */ + BUILD_BUG_ON(I915_GEM_HWS_SCRATCH_INDEX & (1 << (5 - MI_STORE_DWORD_INDEX_SHIFT))); - return 0; + return gen8_emit_flush_qw_store_index(request, flags, + I915_GEM_HWS_SCRATCH_INDEX, 0, 0); } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, @@ -2026,93 +2194,115 @@ static void bxt_a_set_seqno(struct intel_engine_cs *engine, u32 seqno) */ #define WA_TAIL_DWORDS 2 +/* + * Emit the commands that flag the end of execution of a batch. + * + * The GPU will: + * 1) log the seqno of the request we're just completing. + * 2) in the case of a preemptive batch, leave the in-progress sequence + * number set to the same value; otherwise, clear it. We use MI_FLUSH_DW + * to ensure the seqno write completes before the interrupt happens. + * 3) Issue a USER INTERRUPT to notify the driver that the sequence number + * has been updated. + */ + static int gen8_emit_request(struct drm_i915_gem_request *request) { struct intel_ringbuffer *ringbuf = request->ringbuf; - u32 cmd; - u64 addr; + uint32_t seqno = i915_gem_request_get_seqno(request); + uint32_t index = I915_GEM_HWS_INDEX; + uint32_t data2 = 0; int ret; + /* Index must be QWord aligned */ + BUILD_BUG_ON(I915_BATCH_DONE_SEQNO & 1); + BUILD_BUG_ON(I915_PREEMPTIVE_DONE_SEQNO & 1); + + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW QWord address. */ + BUILD_BUG_ON(I915_BATCH_DONE_SEQNO & (1 << (5 - MI_STORE_DWORD_INDEX_SHIFT))); + BUILD_BUG_ON(I915_PREEMPTIVE_DONE_SEQNO & (1 << (5 - MI_STORE_DWORD_INDEX_SHIFT))); + + WARN_ON(!seqno); + + if (request->scheduler_flags & I915_REQ_SF_PREEMPT) { + index = I915_PREEMPTIVE_DONE_SEQNO; + data2 = seqno; + } + + ret = gen8_emit_flush_qw_store_index(request, 0, index, seqno, data2); + if (ret) + return ret; + /* * Reserve space for the instructions below, plus some NOOPs * at the end of each request to be used as a workaround for * not being allowed to do lite restore with HEAD==TAIL * (WaIdleLiteRestore). */ - ret = intel_logical_ring_begin(request, 4 + 2 + WA_TAIL_DWORDS); + ret = intel_logical_ring_begin(request, 2 + WA_TAIL_DWORDS); if (ret) return ret; - cmd = MI_FLUSH_DW; - cmd += 1; /* Gen8+ uses long addresses */ - cmd |= MI_FLUSH_DW_OP_STOREDW; /* Store DWord as post-op */ - cmd |= MI_FLUSH_DW_STORE_INDEX; /* Address is relative to HWSP */ - - // Must be QWord aligned even for DWord write - BUILD_BUG_ON((I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) & (1 << 2)); - -#if 1 - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - // This is true for a QWord write, but not a DWord - BUILD_BUG_ON((I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) & (1 << 5)); -#endif - - addr = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - addr |= MI_FLUSH_DW_USE_GTT; -// addr += ring->status_page.gfx_addr; - - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, lower_32_bits(addr)); - intel_logical_ring_emit(ringbuf, upper_32_bits(addr)); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); return intel_logical_ring_advance_and_submit(request); } +/* + * Emit the commands that flag the end of execution of a batch. + * + * The GPU will: + * 1) log the seqno of the request we're just completing. + * 2) in the case of a preemptive batch, leave the in-progress sequence + * number set to the same value; otherwise, clear it. We use PIPE_CONTROL + * to ensure the seqno write completes before the interrupt happens. + * 3) Issue a USER INTERRUPT to notify the driver that the sequence number + * has been updated. + */ static int gen8_emit_request_render(struct drm_i915_gem_request *request) { struct intel_ringbuffer *ringbuf = request->ringbuf; - u32 cmd, opts; - u64 addr; + uint32_t seqno = i915_gem_request_get_seqno(request); + uint32_t index = I915_GEM_HWS_INDEX; + uint32_t data2 = 0; int ret; + /* Index must be QWord aligned */ + BUILD_BUG_ON(I915_BATCH_DONE_SEQNO & 1); + BUILD_BUG_ON(I915_PREEMPTIVE_DONE_SEQNO & 1); + + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW QWord address. */ + BUILD_BUG_ON(I915_BATCH_DONE_SEQNO & (1 << (5 - MI_STORE_DWORD_INDEX_SHIFT))); + BUILD_BUG_ON(I915_PREEMPTIVE_DONE_SEQNO & (1 << (5 - MI_STORE_DWORD_INDEX_SHIFT))); + + WARN_ON(!seqno); + + if (request->scheduler_flags & I915_REQ_SF_PREEMPT) { + index = I915_PREEMPTIVE_DONE_SEQNO; + data2 = seqno; + } + + /* + * w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + + ret = gen8_emit_pipe_control_qw_store_index(request, 0, index, seqno, data2); + if (ret) + return ret; + /* * Reserve space for the instructions below, plus some NOOPs * at the end of each request to be used as a workaround for * not being allowed to do lite restore with HEAD==TAIL * (WaIdleLiteRestore). */ - ret = intel_logical_ring_begin(request, 6 + 2 + WA_TAIL_DWORDS); + ret = intel_logical_ring_begin(request, 2 + WA_TAIL_DWORDS); if (ret) return ret; - cmd = GFX_OP_PIPE_CONTROL(6); - - opts = PIPE_CONTROL_GLOBAL_GTT_IVB; /* Address via GGTT */ - opts |= PIPE_CONTROL_STORE_DATA_INDEX; /* Index into HWSP */ - opts |= PIPE_CONTROL_CS_STALL; /* Stall CS until done */ - opts |= PIPE_CONTROL_QW_WRITE; /* Write QWord */ - - // Must be QWord aligned - BUILD_BUG_ON((I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) & (1 << 2)); - - addr = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; -// addr += ring->status_page.gfx_addr; - - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, opts); - intel_logical_ring_emit(ringbuf, lower_32_bits(addr)); - intel_logical_ring_emit(ringbuf, upper_32_bits(addr)); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); - intel_logical_ring_emit(ringbuf, 0); /* Clear 'in progress' */ - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 66ab1c968978..851ab8daca70 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -458,28 +458,43 @@ intel_write_status_page(struct intel_engine_cs *engine, * chosen carefully to meet those requirements. The list below shows the * currently-known alignment requirements: * - * I915_GEM_SCRATCH_INDEX must be EVEN (QWord aligned) - * I915_GEM_HWS_INDEX must be EVEN (QWord aligned), but also bit 3 - * must be ZERO, so that the resulting address - * has a 0 in bit 5 (see BSpec for limitation - * on MI_FLUSH_DW instruction). + * I915_GEM_HWS_INDEX + * I915_GEM_SCRATCH_INDEX + * must be EVEN (QWord aligned) but ALSO bit 3 must be ZERO, + * so that the resulting address has a 0 in bit 5 (due to H/W + * limitation on MI_FLUSH_DW instruction with QWord data). + * + * I915_BATCH_DONE_SEQNO + * I915_PREEMPTIVE_DONE_SEQNO + * must be EVEN (QWord aligned) but ALSO bit 3 must be ZERO, + * so that the resulting address has a 0 in bit 5 (due to H/W + * limitation on MI_FLUSH_DW instruction with QWord data). + * + * I915_BATCH_ACTIVE_SEQNO + * I915_PREEMPTIVE_ACTIVE_SEQNO + * must each be at the odd address one above the corresponding + * I915_*_DONE_SEQNO value, as they are addressed both as DWords + * in their own right and as half of a QWord containing both the + * DONE and ACTIVE values together. */ /* * Tracking; these are updated by the GPU at the beginning and/or end of every - * batch. One pair for regular buffers, the other for preemptive ones. + * batch. One pair is for regular buffers, the other for preemptive ones. */ #define I915_BATCH_DONE_SEQNO 0x30 /* Completed batch seqno */ #define I915_BATCH_ACTIVE_SEQNO 0x31 /* In progress batch seqno */ #define I915_PREEMPTIVE_DONE_SEQNO 0x32 /* Completed preemptive batch */ #define I915_PREEMPTIVE_ACTIVE_SEQNO 0x33 /* In progress preemptive batch */ +#define I915_GEM_HWS_SCRATCH_INDEX 0x34 /* QWord, uses 0x35 as well */ +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) + +/* Beware of addresses 0xX8-0xXF due to MI_FLUSH_DW with QWord bug */ #define I915_GEM_HWS_INDEX I915_BATCH_DONE_SEQNO /* alias */ -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +//#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) //#define I915_GEM_ACTIVE_SEQNO_INDEX I915_BATCH_ACTIVE_SEQNO /* alias */ -#define I915_GEM_HWS_SCRATCH_INDEX 0x40 /* QWord */ -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) struct intel_ringbuffer * intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size); |