diff options
author | Dave Airlie <airlied@redhat.com> | 2019-02-04 15:37:52 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-02-04 15:37:58 +1000 |
commit | 2cc3b81dfa7f7de0d647e7f1473de811eef8b0de (patch) | |
tree | aac9ec4195bf41308017d1181db5cbf71afa0582 | |
parent | 148fb2e2e38769d99e3dfafdc03d06eeedcf7ad3 (diff) | |
parent | 46c0cd8c562bc3e4a99cbaa4ba0904b6871b7b4b (diff) |
Merge tag 'drm-intel-next-2019-02-02' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
- Make background color and LUT more robust (Matt)
- Icelake display fixes (Ville, Imre)
- Workarounds fixes and reorg (Tvrtko, Talha)
- Enable fastboot by default on VLV and CHV (Hans)
- Add another PCI ID for Coffee Lake (Rodrigo)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190202082911.GA6615@intel.com
88 files changed, 4768 insertions, 3429 deletions
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 968ca7c91ad8..d5d34d0c79c7 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties); * * Returns 0 on success, -EINVAL on failure. */ -int drm_color_lut_check(struct drm_property_blob *lut, - uint32_t tests) +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests) { - struct drm_color_lut *entry; + const struct drm_color_lut *entry; int i; if (!lut || !tests) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8300efe60fe1..210d0e8777b6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) subdir-ccflags-y += $(call cc-disable-warning, sign-compare) subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, uninitialized) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 4f25b6b7728e..035479e273be 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->dpcd->data_valid = true; port->dpcd->data[DPCD_SINK_COUNT] = 0x1; port->type = type; + port->id = resolution; emulate_monitor_status_change(vgpu); @@ -445,6 +446,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) } /** + * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU + * @vgpu: a vGPU + * @conncted: link state + * + * This function is used to trigger hotplug interrupt for vGPU + * + */ +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + /* TODO: add more platforms support */ + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (connected) { + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + } else { + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT; + } + vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTD_HOTPLUG_STATUS_MASK; + intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); + } +} + +/** * intel_vgpu_clean_display - clean vGPU virtual display emulation * @vgpu: a vGPU * diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index ea7c1c525b8c..a87f33e6a23c 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -146,18 +146,19 @@ enum intel_vgpu_port_type { GVT_PORT_MAX }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + struct intel_vgpu_port { /* per display EDID information */ struct intel_vgpu_edid_data *edid; /* per display DPCD information */ struct intel_vgpu_dpcd_data *dpcd; int type; -}; - -enum intel_vgpu_edid { - GVT_EDID_1024_768, - GVT_EDID_1920_1200, - GVT_EDID_NUM, + enum intel_vgpu_edid id; }; static inline char *vgpu_edid_str(enum intel_vgpu_edid id) @@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id) } } +static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 1024; + case GVT_EDID_1920_1200: + return 1920; + default: + return 0; + } +} + +static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 768; + case GVT_EDID_1920_1200: + return 1200; + default: + return 0; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 4e8947f33bd0..43f4242062dd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .write_protect_handler = intel_vgpu_page_track_handler, + .emulate_hotplug = intel_vgpu_emulate_hotplug, }; static void init_device_info(struct intel_gvt *gvt) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index fb9cc980e120..8bce09de4b82 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected); + static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) { /* We are 64bit bar. */ @@ -577,6 +579,7 @@ struct intel_gvt_ops { int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*write_protect_handler)(struct intel_vgpu *, u64, void *, unsigned int); + void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected); }; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 50798868ab15..5e01cc8d9b16 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -67,6 +67,7 @@ struct intel_gvt_mpt { int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); int (*set_opregion)(void *vgpu); + int (*set_edid)(void *vgpu, int port_num); int (*get_vfio_device)(void *vgpu); void (*put_vfio_device)(void *vgpu); bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f8d44e8f86a6..63eef86a2a85 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops; #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define EDID_BLOB_OFFSET (PAGE_SIZE/2) + #define OPREGION_SIGNATURE "IntelGraphicsMem" struct vfio_region; @@ -76,6 +78,11 @@ struct vfio_region { void *data; }; +struct vfio_edid_region { + struct vfio_region_gfx_edid vfio_edid_regs; + void *edid_blob; +}; + struct kvmgt_pgfn { gfn_t gfn; struct hlist_node hnode; @@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { .release = intel_vgpu_reg_release_opregion, }; +static int handle_edid_regs(struct intel_vgpu *vgpu, + struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; + unsigned int data; + + if (offset + count > sizeof(*regs)) + return -EINVAL; + + if (count != 4) + return -EINVAL; + + if (is_write) { + data = *((unsigned int *)buf); + switch (offset) { + case offsetof(struct vfio_region_gfx_edid, link_state): + if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { + if (!drm_edid_block_valid( + (u8 *)region->edid_blob, + 0, + true, + NULL)) { + gvt_vgpu_err("invalid EDID blob\n"); + return -EINVAL; + } + intel_gvt_ops->emulate_hotplug(vgpu, true); + } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) + intel_gvt_ops->emulate_hotplug(vgpu, false); + else { + gvt_vgpu_err("invalid EDID link state %d\n", + regs->link_state); + return -EINVAL; + } + regs->link_state = data; + break; + case offsetof(struct vfio_region_gfx_edid, edid_size): + if (data > regs->edid_max_size) { + gvt_vgpu_err("EDID size is bigger than %d!\n", + regs->edid_max_size); + return -EINVAL; + } + regs->edid_size = data; + break; + default: + /* read-only regs */ + gvt_vgpu_err("write read-only EDID region at offset %d\n", + offset); + return -EPERM; + } + } else { + memcpy(buf, (char *)regs + offset, count); + } + + return count; +} + +static int handle_edid_blob(struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + if (offset + count > region->vfio_edid_regs.edid_size) + return -EINVAL; + + if (is_write) + memcpy(region->edid_blob + offset, buf, count); + else + memcpy(buf, region->edid_blob + offset, count); + + return count; +} + +static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + struct vfio_edid_region *region = + (struct vfio_edid_region *)vgpu->vdev.region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos < region->vfio_edid_regs.edid_offset) { + ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); + } else { + pos -= EDID_BLOB_OFFSET; + ret = handle_edid_blob(region, buf, count, pos, iswrite); + } + + if (ret < 0) + gvt_vgpu_err("failed to access EDID region\n"); + + return ret; +} + +static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ + kfree(region->data); +} + +static const struct intel_vgpu_regops intel_vgpu_regops_edid = { + .rw = intel_vgpu_reg_rw_edid, + .release = intel_vgpu_reg_release_edid, +}; + static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, unsigned int type, unsigned int subtype, const struct intel_vgpu_regops *ops, @@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu) return ret; } +static int kvmgt_set_edid(void *p_vgpu, int port_num) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + struct vfio_edid_region *base; + int ret; + + base = kzalloc(sizeof(*base), GFP_KERNEL); + if (!base) + return -ENOMEM; + + /* TODO: Add multi-port and EDID extension block support */ + base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; + base->vfio_edid_regs.edid_max_size = EDID_SIZE; + base->vfio_edid_regs.edid_size = EDID_SIZE; + base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); + base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); + base->edid_blob = port->edid->edid_block; + + ret = intel_vgpu_register_reg(vgpu, + VFIO_REGION_TYPE_GFX, + VFIO_REGION_SUBTYPE_GFX_EDID, + &intel_vgpu_regops_edid, EDID_SIZE, + VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS, base); + + return ret; +} + static void kvmgt_put_vfio_device(void *vgpu) { if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) @@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = { .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .set_opregion = kvmgt_set_opregion, + .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, .is_valid_gfn = kvmgt_is_valid_gfn, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 9b4225d44243..5d8b8f228d8f 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -314,6 +314,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) } /** + * intel_gvt_hypervisor_set_edid - Set EDID region for guest + * @vgpu: a vGPU + * @port_num: display port number + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu, + int port_num) +{ + if (!intel_gvt_host.mpt->set_edid) + return 0; + + return intel_gvt_host.mpt->set_edid(vgpu, port_num); +} + +/** * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count * @vgpu: a vGPU * diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e1c860f80eb0..720e2b10adaa 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; + /*TODO: add more platforms support */ + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); + if (ret) + goto out_clean_sched_policy; + return vgpu; out_clean_sched_policy: diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e80903114ca8..fa2c226fc779 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_global) seq_printf(m, " (global)"); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -1285,8 +1285,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_puts(m, "Wedged\n"); if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) seq_puts(m, "Reset in progress: struct_mutex backoff\n"); - if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags)) - seq_puts(m, "Reset in progress: reset handoff to waiter\n"); if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) seq_puts(m, "Waiter holding struct mutex\n"); if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) @@ -1318,37 +1316,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); - seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", - yesno(intel_engine_has_waiter(engine)), - yesno(test_bit(engine->id, - &dev_priv->gpu_error.missed_irq_rings)), - yesno(engine->hangcheck.stalled), - yesno(engine->hangcheck.wedged)); - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); + intel_engine_last_submit(engine), + jiffies_to_msecs(jiffies - + engine->hangcheck.action_timestamp)); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); - seq_printf(m, "\taction = %s(%d) %d ms ago\n", - hangcheck_action_to_str(engine->hangcheck.action), - engine->hangcheck.action, - jiffies_to_msecs(jiffies - - engine->hangcheck.action_timestamp)); if (engine->id == RCS) { seq_puts(m, "\tinstdone read =\n"); @@ -2029,18 +2006,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) return 0; } -static int count_irq_waiters(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int count = 0; - - for_each_engine(engine, i915, id) - count += intel_engine_has_waiter(engine); - - return count; -} - static const char *rps_power_to_str(unsigned int power) { static const char * const strings[] = { @@ -2080,7 +2045,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); - seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -3912,8 +3876,6 @@ static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - unsigned int tmp; /* * There is no safeguard against this debugfs entry colliding @@ -3926,18 +3888,8 @@ i915_wedged_set(void *data, u64 val) if (i915_reset_backoff(&i915->gpu_error)) return -EAGAIN; - for_each_engine_masked(engine, i915, val, tmp) { - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - engine->hangcheck.stalled = true; - } - i915_handle_error(i915, val, I915_ERROR_CAPTURE, "Manually set wedged engine mask = %llx", val); - - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - return 0; } @@ -3945,94 +3897,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); -static int -fault_irq_set(struct drm_i915_private *i915, - unsigned long *irq, - unsigned long val) -{ - int err; - - err = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unlock; - - *irq = val; - mutex_unlock(&i915->drm.struct_mutex); - - /* Flush idle worker to disarm irq */ - drain_delayed_work(&i915->gt.idle_work); - - return 0; - -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int -i915_ring_missed_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.missed_irq_rings; - return 0; -} - -static int -i915_ring_missed_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, - i915_ring_missed_irq_get, i915_ring_missed_irq_set, - "0x%08llx\n"); - -static int -i915_ring_test_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.test_irq_rings; - - return 0; -} - -static int -i915_ring_test_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - /* GuC keeps the user interrupt permanently enabled for submission */ - if (USES_GUC_SUBMISSION(i915)) - return -ENODEV; - - /* - * From icl, we can no longer individually mask interrupt generation - * from each engine. - */ - if (INTEL_GEN(i915) >= 11) - return -ENODEV; - - val &= INTEL_INFO(i915)->ring_mask; - DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - - return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, - i915_ring_test_irq_get, i915_ring_test_irq_set, - "0x%08llx\n"); - #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -4070,7 +3934,8 @@ i915_drop_caches_set(void *data, u64 val) val, val & DROP_ALL); wakeref = intel_runtime_pm_get(i915); - if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) + if (val & DROP_RESET_ACTIVE && + wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) i915_gem_set_wedged(i915); /* No need to check and wait for gpu resets, only libdrm auto-restarts @@ -4092,13 +3957,8 @@ i915_drop_caches_set(void *data, u64 val) mutex_unlock(&i915->drm.struct_mutex); } - if (val & DROP_RESET_ACTIVE && - i915_terminally_wedged(&i915->gpu_error)) { + if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error)) i915_handle_error(i915, ALL_ENGINES, 0, NULL); - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - } fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) @@ -4800,8 +4660,6 @@ static const struct i915_debugfs_files { } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, - {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, - {"i915_ring_test_irq", &i915_ring_test_irq_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3c111ad09922..534e52e3a8da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190124" -#define DRIVER_TIMESTAMP 1548370857 +#define DRIVER_DATE "20190202" +#define DRIVER_TIMESTAMP 1549095268 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -1114,6 +1114,7 @@ struct skl_ddb_values { }; struct skl_wm_level { + u16 min_ddb_alloc; u16 plane_res_b; u8 plane_res_l; bool plane_en; @@ -1975,7 +1976,14 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct list_head timelines; + struct i915_gt_timelines { + struct mutex mutex; /* protects list, tainted by GPU */ + struct list_head active_list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct list_head hwsp_free_list; + } timelines; struct list_head active_rings; struct list_head closed_vma; @@ -2345,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void) INTEL_INFO(dev_priv)->gt == 3) #define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) +#define IS_ICL_WITH_PORT_F(dev_priv) (IS_ICELAKE(dev_priv) && \ + INTEL_DEVID(dev_priv) != 0x8A51) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) @@ -3001,11 +3011,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error) return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); } -static inline bool i915_reset_handoff(struct i915_gpu_error *error) -{ - return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags)); -} - static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 44c82a6b9934..e802af64d628 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -247,21 +247,19 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; u64 pinned; + mutex_lock(&ggtt->vm.mutex); + pinned = ggtt->vm.reserved; - mutex_lock(&dev->struct_mutex); - list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; - list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) - if (i915_vma_is_pinned(vma)) - pinned += vma->node.size; - mutex_unlock(&dev->struct_mutex); + + mutex_unlock(&ggtt->vm.mutex); args->aper_size = ggtt->vm.total; args->aper_available_size = args->aper_size - pinned; @@ -441,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - while ((vma = list_first_entry_or_null(&obj->vma_list, - struct i915_vma, - obj_link))) { + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { list_move_tail(&vma->obj_link, &still_in_list); + spin_unlock(&obj->vma.lock); + ret = i915_vma_unbind(vma); - if (ret) - break; + + spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma_list); + list_splice(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); return ret; } @@ -659,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps_client) { might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&obj->base.dev->struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); timeout = i915_gem_object_wait_reservation(obj->resv, @@ -1539,23 +1536,21 @@ err: static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct list_head *list; struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + mutex_lock(&i915->ggtt.vm.mutex); for_each_ggtt_vma(vma, obj) { - if (i915_vma_is_active(vma)) - continue; - if (!drm_mm_node_allocated(&vma->node)) continue; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + mutex_unlock(&i915->ggtt.vm.mutex); - i915 = to_i915(obj->base.dev); spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; list_move_tail(&obj->mm.link, list); @@ -2878,6 +2873,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -2897,9 +2900,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry(request, &engine->timeline.requests, link) { - if (__i915_request_completed(request, request->global_seqno)) + if (i915_request_completed(request)) continue; + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + active = request; break; } @@ -3229,33 +3239,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static long wait_for_timeline(struct i915_timeline *tl, - unsigned int flags, long timeout) -{ - struct i915_request *rq; - - rq = i915_gem_active_get_unlocked(&tl->last_request); - if (!rq) - return timeout; - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq, NULL); - - timeout = i915_request_wait(rq, flags, timeout); - i915_request_put(rq); - - return timeout; -} - static int wait_for_engines(struct drm_i915_private *i915) { if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3269,6 +3252,52 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } +static long +wait_for_timelines(struct drm_i915_private *i915, + unsigned int flags, long timeout) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline *tl; + + if (!READ_ONCE(i915->gt.active_requests)) + return timeout; + + mutex_lock(>->mutex); + list_for_each_entry(tl, >->active_list, link) { + struct i915_request *rq; + + rq = i915_gem_active_get_unlocked(&tl->last_request); + if (!rq) + continue; + + mutex_unlock(>->mutex); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used a synchronous + * step prior to idling, e.g. in suspend for flushing all + * current operations to memory before sleeping. These we + * want to complete as quickly as possible to avoid prolonged + * stalls, so allow the gpu to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq, NULL); + + timeout = i915_request_wait(rq, flags, timeout); + i915_request_put(rq); + if (timeout < 0) + return timeout; + + /* restart after reacquiring the lock */ + mutex_lock(>->mutex); + tl = list_entry(>->active_list, typeof(*tl), link); + } + mutex_unlock(>->mutex); + + return timeout; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { @@ -3280,17 +3309,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!READ_ONCE(i915->gt.awake)) return 0; + timeout = wait_for_timelines(i915, flags, timeout); + if (timeout < 0) + return timeout; + if (flags & I915_WAIT_LOCKED) { - struct i915_timeline *tl; int err; lockdep_assert_held(&i915->drm.struct_mutex); - list_for_each_entry(tl, &i915->gt.timelines, link) { - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } if (GEM_SHOW_DEBUG() && !timeout) { /* Presume that timeout was non-zero to begin with! */ dev_warn(&i915->drm.pdev->dev, @@ -3304,17 +3331,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); - } else { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - struct i915_timeline *tl = &engine->timeline; - - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } } return 0; @@ -3500,7 +3516,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * reading an invalid PTE on older architectures. */ restart: - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3578,7 +3594,7 @@ restart: */ } - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3588,7 +3604,7 @@ restart: } } - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) vma->node.color = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4164,7 +4180,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->vma_list); + spin_lock_init(&obj->vma.lock); + INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4330,14 +4348,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, - &obj->vma_list, obj_link) { + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_destroy(vma); } - GEM_BUG_ON(!list_empty(&obj->vma_list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); + GEM_BUG_ON(!list_empty(&obj->vma.list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); /* This serializes freeing with the shrinker. Since the free * is delayed, first by RCU then by the workqueue, we want the @@ -4495,8 +4512,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); @@ -4522,6 +4537,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_runtime_pm_put(i915, wakeref); + mutex_lock(&i915->drm.struct_mutex); i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); } @@ -4536,6 +4552,8 @@ int i915_gem_suspend(struct drm_i915_private *i915) wakeref = intel_runtime_pm_get(i915); intel_suspend_gt_powersave(i915); + flush_workqueue(i915->wq); + mutex_lock(&i915->drm.struct_mutex); /* @@ -4565,11 +4583,9 @@ int i915_gem_suspend(struct drm_i915_private *i915) i915_retire_requests(i915); /* ensure we flush after wedging */ mutex_unlock(&i915->drm.struct_mutex); + i915_reset_flush(i915); - intel_uc_suspend(i915); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - cancel_delayed_work_sync(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.retire_work); /* * As the idle_work is rearming if it detects a race, play safe and @@ -4577,6 +4593,8 @@ int i915_gem_suspend(struct drm_i915_private *i915) */ drain_delayed_work(&i915->gt.idle_work); + intel_uc_suspend(i915); + /* * Assert that we successfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -5013,6 +5031,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + i915_timelines_init(dev_priv); + ret = i915_gem_init_userptr(dev_priv); if (ret) return ret; @@ -5135,8 +5155,10 @@ err_unlock: err_uc_misc: intel_uc_fini_misc(dev_priv); - if (ret != -EIO) + if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); + } if (ret == -EIO) { mutex_lock(&dev_priv->drm.struct_mutex); @@ -5187,6 +5209,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); @@ -5289,7 +5312,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5333,7 +5355,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - WARN_ON(!list_empty(&dev_priv->gt.timelines)); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 93e84751370f..6faf1f6faab5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { ce->gem_context = ctx; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); } static struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 3769438228f6..6ba40ff6b91f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -164,6 +164,8 @@ struct i915_gem_context { struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *active; + struct list_head signal_link; + struct list_head signals; struct i915_vma *state; struct intel_ring *ring; u32 *lrc_reg_state; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f6855401f247..68d74c50ac39 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm, struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL, - }, **phase; struct i915_vma *vma, *next; struct drm_mm_node *node; enum drm_mm_insert_mode mode; + struct i915_vma *active; int ret; lockdep_assert_held(&vm->i915->drm.struct_mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* - * The goal is to evict objects and amalgamate space in LRU order. - * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those in flight, - * on the active list, again in retirement order. + * The goal is to evict objects and amalgamate space in rough LRU order. + * Since both active and inactive objects reside on the same list, + * in a mix of creation and last scanned order, as we process the list + * we sort it into inactive/active, which keeps the active portion + * in a rough MRU order. * * The retirement sequence is thus: - * 1. Inactive objects (already retired) - * 2. Active objects (will stall on unbinding) - * - * On each list, the oldest objects lie at the HEAD with the freshest - * object on the TAIL. + * 1. Inactive objects (already retired, random order) + * 2. Active objects (will stall on unbinding, oldest scanned first) */ mode = DRM_MM_INSERT_BEST; if (flags & PIN_HIGH) @@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm, */ if (!(flags & PIN_NONBLOCK)) i915_retire_requests(dev_priv); - else - phases[1] = NULL; search_again: + active = NULL; INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) - if (mark_free(&scan, vma, flags, &eviction_list)) - goto found; - } while (*++phase); + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + /* + * We keep this list in a rough least-recently scanned order + * of active elements (inactive elements are cheap to reap). + * New entries are added to the end, and we move anything we + * scan to the end. The assumption is that the working set + * of applications is either steady state (and thanks to the + * userspace bo cache it almost always is) or volatile and + * frequently replaced after a frame, which are self-evicting! + * Given that assumption, the MRU order of the scan list is + * fairly static, and keeping it in least-recently scan order + * is suitable. + * + * To notice when we complete one full cycle, we record the + * first active element seen, before moving it to the tail. + */ + if (i915_vma_is_active(vma)) { + if (vma == active) { + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + + if (active != ERR_PTR(-EAGAIN)) { + if (!active) + active = vma; + + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; + } + } + + if (mark_free(&scan, vma, flags, &eviction_list)) + goto found; + } /* Nothing found, clean up and bail out! */ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { @@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, */ int i915_gem_evict_vm(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL - }, **phase; struct list_head eviction_list; struct i915_vma *vma, *next; int ret; @@ -412,16 +430,15 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) { - if (i915_vma_is_pinned(vma)) - continue; + mutex_lock(&vm->mutex); + list_for_each_entry(vma, &vm->bound_list, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; - __i915_vma_pin(vma); - list_add(&vma->evict_link, &eviction_list); - } - } while (*++phase); + __i915_vma_pin(vma); + list_add(&vma->evict_link, &eviction_list); + } + mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f250109e1f66..8eedf7cac493 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99a31ded4dfd..09dcaf14121b 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -50,4 +50,3 @@ struct drm_i915_fence_reg { }; #endif - diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9081e3bc5a59..49b00996a15e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->active_list); - INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + INIT_LIST_HEAD(&vm->bound_list); } static void i915_address_space_fini(struct i915_address_space *vm) @@ -1932,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ INIT_LIST_HEAD(&vma->obj_link); + + mutex_lock(&vma->vm->mutex); list_add(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); return vma; } @@ -2111,8 +2113,7 @@ void i915_ppgtt_close(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm) { struct list_head *phases[] = { - &vm->active_list, - &vm->inactive_list, + &vm->bound_list, &vm->unbound_list, NULL, }, **phase; @@ -2135,8 +2136,7 @@ void i915_ppgtt_release(struct kref *kref) ppgtt_destroy_vma(&ppgtt->vm); - GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); - GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list)); GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); ppgtt->vm.cleanup(&ppgtt->vm); @@ -2801,8 +2801,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_fini_aliasing_ppgtt(dev_priv); - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) WARN_ON(i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) @@ -3508,32 +3507,39 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); + mutex_lock(&ggtt->vm.mutex); + /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) continue; + mutex_unlock(&ggtt->vm.mutex); + if (!i915_vma_unbind(vma)) - continue; + goto lock; WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_UPDATE)); if (obj) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + +lock: + mutex_lock(&ggtt->vm.mutex); } ggtt->vm.closed = false; i915_ggtt_invalidate(dev_priv); + mutex_unlock(&ggtt->vm.mutex); + if (INTEL_GEN(dev_priv) >= 8) { struct intel_ppat *ppat = &dev_priv->ppat; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9229b03d629b..03ade71b8d9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,7 @@ #include <linux/pagevec.h> #include "i915_request.h" +#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" @@ -298,32 +299,12 @@ struct i915_address_space { struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** - * List of objects currently involved in rendering. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. + * List of vma currently bound. */ - struct list_head active_list; + struct list_head bound_list; /** - * LRU list of objects which are not in the ringbuffer and - * are ready to unbind, but are still in the GTT. - * - * last_read_req is NULL while an object is in this list. - * - * A reference is not held on the buffer while on this list, - * as merely being GTT-bound shouldn't prevent its being - * freed, and we'll pull it off the list in the free path. - */ - struct list_head inactive_list; - - /** - * List of vma that have been unbound. - * - * A reference is not held on the buffer while on this list. + * List of vma that are not unbound. */ struct list_head unbound_list; @@ -661,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT_ULL(0) -#define PIN_MAPPABLE BIT_ULL(1) -#define PIN_ZONE_4G BIT_ULL(2) -#define PIN_NONFAULT BIT_ULL(3) -#define PIN_NOEVICT BIT_ULL(4) - -#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(8) - -#define PIN_HIGH BIT_ULL(9) -#define PIN_OFFSET_BIAS BIT_ULL(10) -#define PIN_OFFSET_FIXED BIT_ULL(11) +#define PIN_NONFAULT BIT_ULL(1) +#define PIN_NOEVICT BIT_ULL(2) +#define PIN_MAPPABLE BIT_ULL(3) +#define PIN_ZONE_4G BIT_ULL(4) +#define PIN_HIGH BIT_ULL(5) +#define PIN_OFFSET_BIAS BIT_ULL(6) +#define PIN_OFFSET_FIXED BIT_ULL(7) + +#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(11) + #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index cb1b0144d274..73fec917d097 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,24 +87,33 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** - * @vma_list: List of VMAs backed by this object - * - * The VMA on this list are ordered by type, all GGTT vma are placed - * at the head and all ppGTT vma are placed at the tail. The different - * types of GGTT vma are unordered between themselves, use the - * @vma_tree (which has a defined order between all VMA) to find an - * exact match. - */ - struct list_head vma_list; - /** - * @vma_tree: Ordered tree of VMAs backed by this object - * - * All VMA created for this object are placed in the @vma_tree for - * fast retrieval via a binary search in i915_vma_instance(). - * They are also added to @vma_list for easy iteration. - */ - struct rb_root vma_tree; + struct { + /** + * @vma.lock: protect the list/tree of vmas + */ + spinlock_t lock; + + /** + * @vma.list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are + * placed at the head and all ppGTT vma are placed at the tail. + * The different types of GGTT vma are unordered between + * themselves, use the @vma.tree (which has a defined order + * between all VMA) to quickly find an exact match. + */ + struct list_head list; + + /** + * @vma.tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma.tree + * for fast retrieval via a binary search in + * i915_vma_instance(). They are also added to @vma.list for + * easy iteration. + */ + struct rb_root tree; + } vma; /** * @lut_list: List of vma lookup entries in use for this object. diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 8ceecb026910..6da795c7e62e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -461,12 +461,20 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr I915_SHRINK_VMAPS); /* We also want to clear any cached iomaps as they wrap vmap */ + mutex_lock(&i915->ggtt.vm.mutex); list_for_each_entry_safe(vma, next, - &i915->ggtt.vm.inactive_list, vm_link) { + &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; - if (vma->iomap && i915_vma_unbind(vma) == 0) + + if (!vma->iomap || i915_vma_is_active(vma)) + continue; + + mutex_unlock(&i915->ggtt.vm.mutex); + if (i915_vma_unbind(vma) == 0) freed_pages += count; + mutex_lock(&i915->ggtt.vm.mutex); } + mutex_unlock(&i915->ggtt.vm.mutex); out: shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9df615eea2d8..74a9661479ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -701,7 +701,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); + + mutex_lock(&ggtt->vm.mutex); + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + mutex_unlock(&ggtt->vm.mutex); spin_lock(&dev_priv->mm.obj_lock); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 1f8e80e31b49..6e2e5ed2bd0a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->sched_attr.priority, + erq->context, erq->seqno, + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &erq->flags) ? "!" : "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &erq->flags) ? "+" : "", + erq->sched_attr.priority, jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } @@ -530,13 +535,9 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " seqno: 0x%08x\n", ee->seqno); err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ee->waiting)); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); - err_printf(m, " hangcheck action: %s\n", - hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, ee->hangcheck_timestamp == epoch ? "; epoch" : ""); @@ -684,15 +685,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - if (error->engine[i].hangcheck_stalled && - error->engine[i].context.pid) { - err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", - engine_name(m->i915, i), - error->engine[i].context.comm, - error->engine[i].context.pid, - error->engine[i].context.ban_score, - bannable(&error->engine[i].context)); - } + if (!error->engine[i].context.pid) + continue; + + err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", + engine_name(m->i915, i), + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score, + bannable(&error->engine[i].context)); } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); @@ -722,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); - err_printf(m, "Missed interrupts: 0x%08lx\n", - m->i915->gpu_error.missed_irq_rings); for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); @@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, error->epoch); } - if (IS_ERR(ee->waiters)) { - err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", - m->i915->engine[i]->name); - } else if (ee->num_waiters) { - err_printf(m, "%s --- %d waiters\n", - m->i915->engine[i]->name, - ee->num_waiters); - for (j = 0; j < ee->num_waiters; j++) { - err_printf(m, " seqno 0x%08x for %s [%d]\n", - ee->waiters[j].seqno, - ee->waiters[j].comm, - ee->waiters[j].pid); - } - } - print_error_obj(m, m->i915->engine[i], "ringbuffer", ee->ringbuffer); @@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref) i915_error_object_free(ee->wa_ctx); kfree(ee->requests); - if (!IS_ERR_OR_NULL(ee->waiters)) - kfree(ee->waiters); } for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) @@ -1124,7 +1106,9 @@ static void capture_bo(struct drm_i915_error_buffer *err, static u32 capture_error_bo(struct drm_i915_error_buffer *err, int count, struct list_head *head, - bool pinned_only) + unsigned int flags) +#define ACTIVE_ONLY BIT(0) +#define PINNED_ONLY BIT(1) { struct i915_vma *vma; int i = 0; @@ -1133,7 +1117,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, if (!vma->obj) continue; - if (pinned_only && !i915_vma_is_pinned(vma)) + if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma)) + continue; + + if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma)) continue; capture_bo(err++, vma); @@ -1144,7 +1131,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, return i; } -/* Generate a semi-unique error code. The code is not meant to have meaning, The +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The * code's only purpose is to try to prevent false duplicated bug reports by * grossly estimating a GPU error state. * @@ -1153,29 +1141,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * * It's only a small step better than a random number in its current form. */ -static u32 i915_error_generate_code(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - int *engine_id) +static u32 i915_error_generate_code(struct i915_gpu_state *error, + unsigned long engine_mask) { - u32 error_code = 0; - int i; - - /* IPEHR would be an ideal way to detect errors, as it's the gross + /* + * IPEHR would be an ideal way to detect errors, as it's the gross * measure of "the command that hung." However, has some very common * synchronization commands which almost always appear in the case * strictly a client bug. Use instdone to differentiate those some. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->engine[i].hangcheck_stalled) { - if (engine_id) - *engine_id = i; + if (engine_mask) { + struct drm_i915_error_engine *ee = + &error->engine[ffs(engine_mask)]; - return error->engine[i].ipehr ^ - error->engine[i].instdone.instdone; - } + return ee->ipehr ^ ee->instdone.instdone; } - return error_code; + return 0; } static void gem_record_fences(struct i915_gpu_state *error) @@ -1208,59 +1190,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine, I915_READ(RING_SYNC_2(engine->mmio_base)); } -static void error_record_engine_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_error_waiter *waiter; - struct rb_node *rb; - int count; - - ee->num_waiters = 0; - ee->waiters = NULL; - - if (RB_EMPTY_ROOT(&b->waiters)) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - count = 0; - for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) - count++; - spin_unlock_irq(&b->rb_lock); - - waiter = NULL; - if (count) - waiter = kmalloc_array(count, - sizeof(struct drm_i915_error_waiter), - GFP_ATOMIC); - if (!waiter) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - kfree(waiter); - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - ee->waiters = waiter; - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - strcpy(waiter->comm, w->tsk->comm); - waiter->pid = w->tsk->pid; - waiter->seqno = w->seqno; - waiter++; - - if (++ee->num_waiters == count) - break; - } - spin_unlock_irq(&b->rb_lock); -} - static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) @@ -1296,7 +1225,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, intel_engine_get_instdone(engine, &ee->instdone); - ee->waiting = intel_engine_has_waiter(engine); ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); @@ -1338,9 +1266,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; - ee->hangcheck_action = engine->hangcheck.action; - ee->hangcheck_stalled = engine->hangcheck.stalled; + if (!ee->idle) + ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1371,6 +1298,7 @@ static void record_request(struct i915_request *request, { struct i915_gem_context *ctx = request->gem_context; + erq->flags = request->fence.flags; erq->context = ctx->hw_id; erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&ctx->ban_score); @@ -1546,7 +1474,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->engine_id = i; error_record_engine_registers(error, engine, ee); - error_record_engine_waiters(engine, ee); error_record_engine_execlists(engine, ee); request = i915_gem_find_active_request(engine); @@ -1610,14 +1537,17 @@ static void gem_capture_vm(struct i915_gpu_state *error, int count; count = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count++; + list_for_each_entry(vma, &vm->bound_list, vm_link) + if (i915_vma_is_active(vma)) + count++; active_bo = NULL; if (count) active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); if (active_bo) - count = capture_error_bo(active_bo, count, &vm->active_list, false); + count = capture_error_bo(active_bo, + count, &vm->bound_list, + ACTIVE_ONLY); else count = 0; @@ -1655,28 +1585,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error) struct i915_address_space *vm = &error->i915->ggtt.vm; struct drm_i915_error_buffer *bo; struct i915_vma *vma; - int count_inactive, count_active; - - count_inactive = 0; - list_for_each_entry(vma, &vm->inactive_list, vm_link) - count_inactive++; + int count; - count_active = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count_active++; + count = 0; + list_for_each_entry(vma, &vm->bound_list, vm_link) + count++; bo = NULL; - if (count_inactive + count_active) - bo = kcalloc(count_inactive + count_active, - sizeof(*bo), GFP_ATOMIC); + if (count) + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); if (!bo) return; - count_inactive = capture_error_bo(bo, count_inactive, - &vm->active_list, true); - count_active = capture_error_bo(bo + count_inactive, count_active, - &vm->inactive_list, true); - error->pinned_bo_count = count_inactive + count_active; + error->pinned_bo_count = + capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY); error->pinned_bo = bo; } @@ -1783,31 +1705,35 @@ static void capture_reg_state(struct i915_gpu_state *error) error->pgtbl_er = I915_READ(PGTBL_ER); } -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - u32 engine_mask, - const char *error_msg) +static const char * +error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) { - u32 ecode; - int engine_id = -1, len; + int len; + int i; - ecode = i915_error_generate_code(dev_priv, error, &engine_id); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) + if (!error->engine[i].context.pid) + engines &= ~BIT(i); len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), engine_id, ecode); - - if (engine_id != -1 && error->engine[engine_id].context.pid) + "GPU HANG: ecode %d:%lx:0x%08x", + INTEL_GEN(error->i915), engines, + i915_error_generate_code(error, engines)); + if (engines) { + /* Just show the first executing process, more is confusing */ + i = ffs(engines); len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].context.comm, - error->engine[engine_id].context.pid); + error->engine[i].context.comm, + error->engine[i].context.pid); + } + if (msg) + len += scnprintf(error->error_msg + len, + sizeof(error->error_msg) - len, + ", %s", msg); - scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, - ", reason: %s, action: %s", - error_msg, - engine_mask ? "reset" : "continue"); + return error->error_msg; } static void capture_gen_state(struct i915_gpu_state *error) @@ -1847,7 +1773,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error) for (i = 0; i < ARRAY_SIZE(error->engine); i++) { const struct drm_i915_error_engine *ee = &error->engine[i]; - if (ee->hangcheck_stalled && + if (ee->hangcheck_timestamp && time_before(ee->hangcheck_timestamp, epoch)) epoch = ee->hangcheck_timestamp; } @@ -1921,7 +1847,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * i915_capture_error_state - capture an error record for later analysis * @i915: i915 device * @engine_mask: the mask of engines triggering the hang - * @error_msg: a message to insert into the error capture header + * @msg: a message to insert into the error capture header * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills @@ -1929,8 +1855,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * to pick up. */ void i915_capture_error_state(struct drm_i915_private *i915, - u32 engine_mask, - const char *error_msg) + unsigned long engine_mask, + const char *msg) { static bool warned; struct i915_gpu_state *error; @@ -1946,8 +1872,7 @@ void i915_capture_error_state(struct drm_i915_private *i915, if (IS_ERR(error)) return; - i915_error_capture_msg(i915, error, engine_mask, error_msg); - DRM_INFO("%s\n", error->error_msg); + dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg)); if (!error->simulated) { spin_lock_irqsave(&i915->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 604291f7762d..53b1f22dd365 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -82,11 +82,7 @@ struct i915_gpu_state { int engine_id; /* Software tracked state */ bool idle; - bool waiting; - int num_waiters; unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; struct i915_address_space *vm; int num_requests; u32 reset_count; @@ -149,6 +145,7 @@ struct i915_gpu_state { struct drm_i915_error_object *default_state; struct drm_i915_error_request { + unsigned long flags; long jiffies; pid_t pid; u32 context; @@ -161,12 +158,6 @@ struct i915_gpu_state { } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - struct { u32 gfx_mode; union { @@ -197,6 +188,8 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; +struct i915_gpu_restart; + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -211,8 +204,6 @@ struct i915_gpu_error { atomic_t pending_fb_pin; - unsigned long missed_irq_rings; - /** * State variable controlling the reset flow and count * @@ -247,15 +238,6 @@ struct i915_gpu_error { * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a * secondary role in preventing two concurrent global reset attempts. * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. - * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to * acquire the struct_mutex to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. @@ -269,20 +251,13 @@ struct i915_gpu_error { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 -#define I915_RESET_ENGINE 3 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 #define I915_WEDGED (BITS_PER_LONG - 1) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; - /** Set of stalled engines with guilty requests, in the current reset */ - u32 stalled_mask; - - /** Reason for the current *global* reset */ - const char *reason; - struct mutex wedge_mutex; /* serialises wedging/unwedging */ /** @@ -297,8 +272,7 @@ struct i915_gpu_error { */ wait_queue_head_t reset_queue; - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; + struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { @@ -320,7 +294,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, + unsigned long engine_mask, const char *error_msg); static inline struct i915_gpu_state * diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7e56611b3d60..441d2674b272 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -823,11 +823,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + const struct drm_display_mode *mode = &vblank->hwmode; i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; - const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode; unsigned long irqflags; + /* + * On i965gm TV output the frame counter only works up to + * the point when we enable the TV encoder. After that the + * frame counter ceases to work and reads zero. We need a + * vblank wait before enabling the TV encoder and so we + * have to enable vblank interrupts while the frame counter + * is still in a working state. However the core vblank code + * does not like us returning non-zero frame counter values + * when we've told it that we don't have a working frame + * counter. Thus we must stop non-zero values leaking out. + */ + if (!vblank->max_vblank_count) + return 0; + htotal = mode->crtc_htotal; hsync_start = mode->crtc_hsync_start; vbl_start = mode->crtc_vblank_start; @@ -999,6 +1014,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; + bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 || + IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) || + mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; if (WARN_ON(!mode->crtc_clock)) { DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled " @@ -1031,7 +1049,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, if (stime) *stime = ktime_get(); - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ @@ -1091,7 +1109,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, else position += vtotal - vbl_end; - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { *vpos = position; *hpos = 0; } else { @@ -1153,68 +1171,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) return; } -static void notify_ring(struct intel_engine_cs *engine) -{ - const u32 seqno = intel_engine_get_seqno(engine); - struct i915_request *rq = NULL; - struct task_struct *tsk = NULL; - struct intel_wait *wait; - - if (unlikely(!engine->breadcrumbs.irq_armed)) - return; - - rcu_read_lock(); - - spin_lock(&engine->breadcrumbs.irq_lock); - wait = engine->breadcrumbs.irq_wait; - if (wait) { - /* - * We use a callback from the dma-fence to submit - * requests after waiting on our own requests. To - * ensure minimum delay in queuing the next request to - * hardware, signal the fence now rather than wait for - * the signaler to be woken up. We still wake up the - * waiter in order to handle the irq-seqno coherency - * issues (we may receive the interrupt before the - * seqno is written, see __i915_request_irq_complete()) - * and to handle coalescing of multiple seqno updates - * and many waiters. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - struct i915_request *waiter = wait->request; - - if (waiter && - !i915_request_signaled(waiter) && - intel_wait_check_request(wait, waiter)) - rq = i915_request_get(waiter); - - tsk = wait->tsk; - } - - engine->breadcrumbs.irq_count++; - } else { - if (engine->breadcrumbs.irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - } - spin_unlock(&engine->breadcrumbs.irq_lock); - - if (rq) { - spin_lock(&rq->lock); - dma_fence_signal_locked(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - spin_unlock(&rq->lock); - - i915_request_put(rq); - } - - if (tsk && tsk->state & TASK_NORMAL) - wake_up_process(tsk); - - rcu_read_unlock(); - - trace_intel_engine_notify(engine, wait); -} - static void vlv_c0_read(struct drm_i915_private *dev_priv, struct intel_rps_ei *ei) { @@ -1459,20 +1415,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); } static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (gt_iir & GT_BLT_USER_INTERRUPT) - notify_ring(dev_priv->engine[BCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1492,7 +1448,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { - notify_ring(engine); + intel_engine_breadcrumbs_irq(engine); tasklet |= USES_GUC_SUBMISSION(engine->i915); } @@ -1838,7 +1794,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (HAS_VEBOX(dev_priv)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(dev_priv->engine[VECS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -4262,7 +4218,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4370,7 +4326,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4515,10 +4471,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4581,16 +4537,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - if (IS_GEN(dev_priv, 2)) { - /* Gen2 doesn't have a hardware frame counter */ - dev->max_vblank_count = 0; - } else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { - dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) dev->driver->get_vblank_counter = g4x_get_vblank_counter; - } else { + else if (INTEL_GEN(dev_priv) >= 3) dev->driver->get_vblank_counter = i915_get_vblank_counter; - dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ - } /* * Opt out of the vblank disable timer on everything except gen2. diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9f0539bdaa39..b5be0abbba35 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400, i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); -i915_param_named(fastboot, bool, 0600, - "Try to skip unnecessary mode sets at boot time (default: false)"); +i915_param_named(fastboot, int, 0600, + "Try to skip unnecessary mode sets at boot time " + "(0=disabled, 1=enabled) " + "Default: -1 (use per-chip default)"); i915_param_named_unsafe(prefault_disable, bool, 0600, "Disable page prefaulting for pread/pwrite/reloc (default:false). " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 6efcf330bdab..3f14e9881a0d 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,10 +63,10 @@ struct drm_printer; param(int, edp_vswing, 0) \ param(int, reset, 2) \ param(unsigned int, inject_load_failure, 0) \ + param(int, fastboot, -1) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ - param(bool, fastboot, false) \ param(bool, prefault_disable, false) \ param(bool, load_detect_test, false) \ param(bool, force_reset_modeset_test, false) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 44c23ac60347..5d05572c9ff4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -69,9 +69,15 @@ #define BDW_COLORS \ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ - .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } + .color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + } #define GLK_COLORS \ - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \ + DRM_COLOR_LUT_EQUAL_CHANNELS, \ + } /* Keep in gen based order, and chronological order within a gen */ @@ -707,6 +713,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f4e447437d75..ede54fdc1676 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2801,6 +2801,9 @@ enum i915_power_well_id { #define GEN6_RCS_PWR_FSM _MMIO(0x22ac) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) +#define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) + /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) #define HSW_F1_EU_DIS_SHIFT 16 @@ -4895,6 +4898,7 @@ enum { # define TV_OVERSAMPLE_NONE (2 << 18) /* Selects 8x oversampling */ # define TV_OVERSAMPLE_8X (3 << 18) +# define TV_OVERSAMPLE_MASK (3 << 18) /* Selects progressive mode rather than interlaced */ # define TV_PROGRESSIVE (1 << 17) /* Sets the colorburst to PAL mode. Required for non-M PAL modes. */ @@ -5709,6 +5713,12 @@ enum { #define PIPEMISC_DITHER_TYPE_SP (0 << 2) #define PIPEMISC(pipe) _MMIO_PIPE2(pipe, _PIPE_MISC_A) +/* Skylake+ pipe bottom (background) color */ +#define _SKL_BOTTOM_COLOR_A 0x70034 +#define SKL_BOTTOM_COLOR_GAMMA_ENABLE (1 << 31) +#define SKL_BOTTOM_COLOR_CSC_ENABLE (1 << 30) +#define SKL_BOTTOM_COLOR(pipe) _MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A) + #define VLV_DPFLIPSTAT _MMIO(VLV_DISPLAY_BASE + 0x70028) #define PIPEB_LINE_COMPARE_INT_EN (1 << 29) #define PIPEB_HLINE_INT_EN (1 << 28) @@ -9553,7 +9563,7 @@ enum skl_power_gate { #define _MG_PLL3_ENABLE 0x46038 #define _MG_PLL4_ENABLE 0x4603C /* Bits are the same as DPLL0_ENABLE */ -#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ +#define MG_PLL_ENABLE(tc_port) _MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \ _MG_PLL2_ENABLE) #define _MG_REFCLKIN_CTL_PORT1 0x16892C @@ -9562,9 +9572,9 @@ enum skl_power_gate { #define _MG_REFCLKIN_CTL_PORT4 0x16B92C #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) #define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8) -#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_REFCLKIN_CTL_PORT1, \ - _MG_REFCLKIN_CTL_PORT2) +#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) #define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 #define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 @@ -9574,9 +9584,9 @@ enum skl_power_gate { #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8) -#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_CORECLKCTL1_PORT1, \ - _MG_CLKTOP2_CORECLKCTL1_PORT2) +#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) #define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 #define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 @@ -9594,9 +9604,9 @@ enum skl_power_gate { #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT 8 #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8) -#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_HSCLKCTL_PORT1, \ - _MG_CLKTOP2_HSCLKCTL_PORT2) +#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) #define _MG_PLL_DIV0_PORT1 0x168A00 #define _MG_PLL_DIV0_PORT2 0x169A00 @@ -9608,8 +9618,8 @@ enum skl_power_gate { #define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) #define MG_PLL_DIV0_FBDIV_INT_MASK (0xff << 0) #define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) -#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ - _MG_PLL_DIV0_PORT2) +#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) #define _MG_PLL_DIV1_PORT1 0x168A04 #define _MG_PLL_DIV1_PORT2 0x169A04 @@ -9623,8 +9633,8 @@ enum skl_power_gate { #define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) #define MG_PLL_DIV1_FBPREDIV_MASK (0xf << 0) #define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) -#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ - _MG_PLL_DIV1_PORT2) +#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) #define _MG_PLL_LF_PORT1 0x168A08 #define _MG_PLL_LF_PORT2 0x169A08 @@ -9636,8 +9646,8 @@ enum skl_power_gate { #define MG_PLL_LF_GAINCTRL(x) ((x) << 16) #define MG_PLL_LF_INT_COEFF(x) ((x) << 8) #define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) -#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ - _MG_PLL_LF_PORT2) +#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) #define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C #define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C @@ -9649,9 +9659,9 @@ enum skl_power_gate { #define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) #define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) #define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) -#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_FRAC_LOCK_PORT1, \ - _MG_PLL_FRAC_LOCK_PORT2) +#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) #define _MG_PLL_SSC_PORT1 0x168A10 #define _MG_PLL_SSC_PORT2 0x169A10 @@ -9663,8 +9673,8 @@ enum skl_power_gate { #define MG_PLL_SSC_STEPNUM(x) ((x) << 10) #define MG_PLL_SSC_FLLEN (1 << 9) #define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) -#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ - _MG_PLL_SSC_PORT2) +#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) #define _MG_PLL_BIAS_PORT1 0x168A14 #define _MG_PLL_BIAS_PORT2 0x169A14 @@ -9683,8 +9693,8 @@ enum skl_power_gate { #define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5) #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) #define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0) -#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ - _MG_PLL_BIAS_PORT2) +#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) #define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 #define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 @@ -9695,9 +9705,9 @@ enum skl_power_gate { #define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) #define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) #define MG_PLL_TDC_TDCSEL(x) ((x) << 0) -#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_TDC_COLDST_BIAS_PORT1, \ - _MG_PLL_TDC_COLDST_BIAS_PORT2) +#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f941e40fd373..9ed5baf157a3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - return intel_engine_enable_signaling(to_request(fence), true); + return i915_request_enable_breadcrumb(to_request(fence)); } static signed long i915_fence_wait(struct dma_fence *fence, @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -198,10 +199,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); + i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - intel_engine_cancel_signaling(rq); + i915_request_cancel_breadcrumb(rq); if (rq->waitboost) { GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); atomic_dec(&rq->i915->gt_pm.rps.num_waiters); @@ -244,10 +246,11 @@ static void i915_request_retire(struct i915_request *request) { struct i915_gem_active *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -307,10 +310,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -329,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq) static u32 timeline_get_seqno(struct i915_timeline *tl) { - return ++tl->seqno; + return tl->seqno += 1 + tl->has_initial_breadcrumb; } static void move_to_timeline(struct i915_request *request, @@ -355,10 +359,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -372,20 +377,21 @@ void __i915_request_submit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); request->global_seqno = seqno; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request, false); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && + !i915_request_enable_breadcrumb(request)) + intel_engine_queue_breadcrumbs(engine); spin_unlock(&request->lock); - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); - - wake_up_all(&request->execute); } void i915_request_submit(struct i915_request *request) @@ -405,10 +411,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -427,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request) spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); + i915_request_cancel_breadcrumb(request); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ @@ -616,6 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); + rq->hwsp_seqno = rq->timeline->hwsp_seqno; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -626,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); - init_waitqueue_head(&rq->execute); i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; - rq->signaling.wait.seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; @@ -650,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32); + rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -901,7 +909,7 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); @@ -1023,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request *rq, - u32 seqno, int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct i915_request * const rq, + int state, unsigned long timeout_us) { - struct intel_engine_cs *engine = rq->engine; - unsigned int irq, cpu; - - GEM_BUG_ON(!seqno); + unsigned int cpu; /* * Only wait for the request if we know it is likely to complete. @@ -1037,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq, * We don't track the timestamps around requests, nor the average * request length, so we do not have a good indicator that this * request will complete within the timeout. What we do know is the - * order in which requests are executed by the engine and so we can - * tell if the request has started. If the request hasn't started yet, - * it is a fair assumption that it will not complete within our - * relatively short timeout. + * order in which requests are executed by the context and so we can + * tell if the request has been started. If the request is not even + * running yet, it is a fair assumption that it will not complete + * within our relatively short timeout. */ - if (!intel_engine_has_started(engine, seqno)) + if (!i915_request_is_running(rq)) return false; /* @@ -1056,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq, * takes to sleep on a request, on the order of a microsecond. */ - irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { - if (intel_engine_has_completed(engine, seqno)) - return seqno == i915_request_global_seqno(rq); - - /* - * Seqno are meant to be ordered *before* the interrupt. If - * we see an interrupt without a corresponding seqno advance, - * assume we won't see one in the near future but require - * the engine->seqno_barrier() to fixup coherency. - */ - if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) - break; + if (i915_request_completed(rq)) + return true; if (signal_pending_state(state, current)) break; @@ -1083,16 +1078,16 @@ static bool __i915_spin_request(const struct i915_request *rq, return false; } -static bool __i915_wait_request_check_and_reset(struct i915_request *request) -{ - struct i915_gpu_error *error = &request->i915->gpu_error; +struct request_wait { + struct dma_fence_cb cb; + struct task_struct *tsk; +}; - if (likely(!i915_reset_handoff(error))) - return false; +static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct request_wait *wait = container_of(cb, typeof(*wait), cb); - __set_current_state(TASK_RUNNING); - i915_reset(request->i915, error->stalled_mask, error->reason); - return true; + wake_up_process(wait->tsk); } /** @@ -1120,17 +1115,9 @@ long i915_request_wait(struct i915_request *rq, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; - DEFINE_WAIT_FUNC(reset, default_wake_function); - DEFINE_WAIT_FUNC(exec, default_wake_function); - struct intel_wait wait; + struct request_wait wait; might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&rq->i915->drm.struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); if (i915_request_completed(rq)) @@ -1141,57 +1128,23 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - add_wait_queue(&rq->execute, &exec); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); + /* Optimistic short spin before touching IRQs */ + if (__i915_spin_request(rq, state, 5)) + goto out; - intel_wait_init(&wait); if (flags & I915_WAIT_PRIORITY) i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); -restart: - do { - set_current_state(state); - if (intel_wait_update_request(&wait, rq)) - break; - - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } - - if (!timeout) { - timeout = -ETIME; - goto complete; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, wait.seqno, state, 5)) - goto complete; + wait.tsk = current; + if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) + goto out; - set_current_state(state); - if (intel_engine_add_wait(rq->engine, &wait)) - /* - * In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; + for (;;) { + set_current_state(state); - if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(rq); + if (i915_request_completed(rq)) + break; - for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; @@ -1203,50 +1156,13 @@ restart: } timeout = io_schedule_timeout(timeout); - - if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, rq)) - break; - - set_current_state(state); - -wakeup: - if (i915_request_completed(rq)) - break; - - /* - * If the GPU is hung, and we hold the lock, reset the GPU - * and then check for completion. On a full reset, the engine's - * HW seqno will be advanced passed us and we are complete. - * If we do a partial reset, we have to wait for the GPU to - * resume and update the breadcrumb. - * - * If we don't hold the mutex, we can just wait for the worker - * to come along and update the breadcrumb (either directly - * itself, or indirectly by recovering the GPU). - */ - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - - /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(rq, wait.seqno, state, 2)) - break; - - if (!intel_wait_check_request(&wait, rq)) { - intel_engine_remove_wait(rq->engine, &wait); - goto restart; - } } - - intel_engine_remove_wait(rq->engine, &wait); -complete: __set_current_state(TASK_RUNNING); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); - remove_wait_queue(&rq->execute, &exec); - trace_i915_request_wait_end(rq); + dma_fence_remove_callback(&rq->fence, &wait.cb); + +out: + trace_i915_request_wait_end(rq); return timeout; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c0f084ca4f29..3cffb96203b9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,23 +38,34 @@ struct drm_i915_gem_object; struct i915_request; struct i915_timeline; -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - struct i915_request *request; - u32 seqno; -}; - -struct intel_signal_node { - struct intel_wait wait; - struct list_head link; -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; }; +enum { + /* + * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. + * + * Set by __i915_request_submit() on handing over to HW, and cleared + * by __i915_request_unsubmit() if we preempt this request. + * + * Finally cleared for consistency on retiring the request, when + * we know the HW is no longer running this request. + * + * See i915_request_is_active() + */ + I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + + /* + * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list + * + * Internal bookkeeping used by the breadcrumb code to track when + * a request is on the various signal_list. + */ + I915_FENCE_FLAG_SIGNAL, +}; + /** * Request queue structure. * @@ -97,7 +108,7 @@ struct i915_request { struct intel_context *hw_context; struct intel_ring *ring; struct i915_timeline *timeline; - struct intel_signal_node signaling; + struct list_head signal_link; /* * The rcu epoch of when this request was allocated. Used to judiciously @@ -116,7 +127,6 @@ struct i915_request { */ struct i915_sw_fence submit; wait_queue_entry_t submitq; - wait_queue_head_t execute; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -130,6 +140,13 @@ struct i915_request { struct i915_sched_node sched; struct i915_dependency dep; + /* + * A convenience pointer to the current breadcrumb value stored in + * the HW status page (or our timeline's local equivalent). The full + * path would be rq->hw_context->ring->timeline->hwsp_seqno. + */ + const u32 *hwsp_seqno; + /** * GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not @@ -248,7 +265,7 @@ i915_request_put(struct i915_request *rq) * that it has passed the global seqno and the global seqno is unchanged * after the read, it is indeed complete). */ -static u32 +static inline u32 i915_request_global_seqno(const struct i915_request *request) { return READ_ONCE(request->global_seqno); @@ -270,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); +/* Note: part of the intel_breadcrumbs family */ +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); + long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) @@ -282,13 +303,14 @@ long i915_request_wait(struct i915_request *rq, static inline bool i915_request_signaled(const struct i915_request *rq) { + /* The request may live longer than its HWSP, so check flags first! */ return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno); -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno); +static inline bool i915_request_is_active(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); +} /** * Returns true if seq1 is later than seq2. @@ -298,6 +320,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline u32 __hwsp_seqno(const struct i915_request *rq) +{ + return READ_ONCE(*rq->hwsp_seqno); +} + +/** + * hwsp_seqno - the current breadcrumb value in the HW status page + * @rq: the request, to chase the relevant HW status page + * + * The emphasis in naming here is that hwsp_seqno() is not a property of the + * request, but an indication of the current HW state (associated with this + * request). Its value will change as the GPU executes more requests. + * + * Returns the current breadcrumb value in the associated HW status page (or + * the local timeline's equivalent) for this request. The request itself + * has the associated breadcrumb value of rq->fence.seqno, when the HW + * status page has that breadcrumb or later, this request is complete. + */ +static inline u32 hwsp_seqno(const struct i915_request *rq) +{ + u32 seqno; + + rcu_read_lock(); /* the HWSP may be freed at runtime */ + seqno = __hwsp_seqno(rq); + rcu_read_unlock(); + + return seqno; +} + +static inline bool __i915_request_has_started(const struct i915_request *rq) +{ + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); +} + /** * i915_request_started - check if the request has begun being executed * @rq: the request @@ -309,32 +365,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) */ static inline bool i915_request_started(const struct i915_request *rq) { - u32 seqno; + if (i915_request_signaled(rq)) + return true; - seqno = i915_request_global_seqno(rq); - if (!seqno) /* not yet submitted to HW */ - return false; - - return intel_engine_has_started(rq->engine, seqno); + /* Remember: started but may have since been preempted! */ + return __i915_request_has_started(rq); } -static inline bool -__i915_request_completed(const struct i915_request *rq, u32 seqno) +/** + * i915_request_is_running - check if the request may actually be executing + * @rq: the request + * + * Returns true if the request is currently submitted to hardware, has passed + * its start point (i.e. the context is setup and not busywaiting). Note that + * it may no longer be running by the time the function returns! + */ +static inline bool i915_request_is_running(const struct i915_request *rq) { - GEM_BUG_ON(!seqno); - return intel_engine_has_completed(rq->engine, seqno) && - seqno == i915_request_global_seqno(rq); + if (!i915_request_is_active(rq)) + return false; + + return __i915_request_has_started(rq); } static inline bool i915_request_completed(const struct i915_request *rq) { - u32 seqno; + if (i915_request_signaled(rq)) + return true; - seqno = i915_request_global_seqno(rq); - if (!seqno) - return false; + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); +} - return __i915_request_completed(rq, seqno); +static inline void i915_request_mark_complete(struct i915_request *rq) +{ + rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } void i915_retire_requests(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 342d9ee42601..4462007a681c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -5,6 +5,7 @@ */ #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include "i915_drv.h" #include "i915_gpu_error.h" @@ -12,27 +13,33 @@ #include "intel_guc.h" +#define RESET_MAX_RETRIES 3 + +/* XXX How to handle concurrent GGTT updates using tiling registers? */ +#define RESET_UNDER_STOP_MACHINE 0 + static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; struct i915_timeline *timeline = rq->timeline; - unsigned long flags; + lockdep_assert_held(&engine->timeline.lock); GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock(&timeline->lock); - list_for_each_entry_continue(rq, &engine->timeline.requests, link) - if (rq->gem_context == hung_ctx) - i915_request_skip(rq, -EIO); + if (i915_request_is_active(rq)) { + list_for_each_entry_continue(rq, + &engine->timeline.requests, link) + if (rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); + } list_for_each_entry(rq, &timeline->requests, link) i915_request_skip(rq, -EIO); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void client_mark_guilty(struct drm_i915_file_private *file_priv, @@ -59,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static void context_mark_guilty(struct i915_gem_context *ctx) +static bool context_mark_guilty(struct i915_gem_context *ctx) { unsigned int score; bool banned, bannable; @@ -72,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx) /* Cool contexts don't accumulate client ban score */ if (!bannable) - return; + return false; if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", @@ -83,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx) if (!IS_ERR_OR_NULL(ctx->file_priv)) client_mark_guilty(ctx->file_priv, ctx); + + return banned; } static void context_mark_innocent(struct i915_gem_context *ctx) @@ -90,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } +void i915_reset_request(struct i915_request *rq, bool guilty) +{ + lockdep_assert_held(&rq->engine->timeline.lock); + GEM_BUG_ON(i915_request_completed(rq)); + + if (guilty) { + i915_request_skip(rq, -EIO); + if (context_mark_guilty(rq->gem_context)) + engine_skip_context(rq); + } else { + dma_fence_set_error(&rq->fence, -EAGAIN); + context_mark_innocent(rq->gem_context); + } +} + static void gen3_stop_engine(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -144,14 +168,14 @@ static int i915_do_reset(struct drm_i915_private *i915, /* Assert reset for at least 20 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - usleep_range(50, 200); - err = wait_for(i915_in_reset(pdev), 500); + udelay(50); + err = wait_for_atomic(i915_in_reset(pdev), 50); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); - usleep_range(50, 200); + udelay(50); if (!err) - err = wait_for(!i915_in_reset(pdev), 500); + err = wait_for_atomic(!i915_in_reset(pdev), 50); return err; } @@ -171,7 +195,7 @@ static int g33_do_reset(struct drm_i915_private *i915, struct pci_dev *pdev = i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for(g4x_reset_complete(pdev), 500); + return wait_for_atomic(g4x_reset_complete(pdev), 50); } static int g4x_do_reset(struct drm_i915_private *dev_priv, @@ -182,13 +206,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; @@ -196,7 +220,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; @@ -205,9 +229,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, out: pci_write_config_byte(pdev, I915_GDRST, 0); - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); return ret; } @@ -218,27 +242,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, { int ret; - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; } - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; } out: - I915_WRITE(ILK_GDSR, 0); - POSTING_READ(ILK_GDSR); + I915_WRITE_FW(ILK_GDSR, 0); + POSTING_READ_FW(ILK_GDSR); return ret; } @@ -527,32 +553,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) { - reset_func reset = intel_get_gpu_reset(i915); + const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; + reset_func reset; + int ret = -ETIMEDOUT; int retry; - int ret; - /* - * We want to perform per-engine reset from atomic context (e.g. - * softirq), which imposes the constraint that we cannot sleep. - * However, experience suggests that spending a bit of time waiting - * for a reset helps in various cases, so for a full-device reset - * we apply the opposite rule and wait if we want to. As we should - * always follow up a failed per-engine reset with a full device reset, - * being a little faster, stricter and more error prone for the - * atomic case seems an acceptable compromise. - * - * Unfortunately this leads to a bimodal routine, when the goal was - * to have a single reset function that worked for resetting any - * number of engines simultaneously. - */ - might_sleep_if(engine_mask == ALL_ENGINES); + reset = intel_get_gpu_reset(i915); + if (!reset) + return -ENODEV; /* * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); - for (retry = 0; retry < 3; retry++) { + for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -569,15 +584,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) */ i915_stop_engines(i915, engine_mask); - ret = -ENODEV; - if (reset) { - GEM_TRACE("engine_mask=%x\n", engine_mask); - ret = reset(i915, engine_mask, retry); - } - if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) - break; - - cond_resched(); + GEM_TRACE("engine_mask=%x\n", engine_mask); + preempt_disable(); + ret = reset(i915, engine_mask, retry); + preempt_enable(); } intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); @@ -586,6 +596,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) bool intel_has_gpu_reset(struct drm_i915_private *i915) { + if (USES_GUC(i915)) + return false; + return intel_get_gpu_reset(i915); } @@ -613,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915) * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. */ -static struct i915_request * -reset_prepare_engine(struct intel_engine_cs *engine) +static void reset_prepare_engine(struct intel_engine_cs *engine) { - struct i915_request *rq; - /* * During the reset sequence, we must prevent the engine from * entering RC6. As the context state is undefined until we restart @@ -626,162 +636,85 @@ reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - - rq = engine->reset.prepare(engine); - if (rq && rq->fence.error == -EIO) - rq = ERR_PTR(-EIO); /* Previous reset failed! */ - - return rq; + engine->reset.prepare(engine); } -static int reset_prepare(struct drm_i915_private *i915) +static void reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; - struct i915_request *rq; enum intel_engine_id id; - int err = 0; - - for_each_engine(engine, i915, id) { - rq = reset_prepare_engine(engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - continue; - } - engine->hangcheck.active_request = rq; - } + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); - i915_gem_revoke_fences(i915); intel_uc_sanitize(i915); - - return err; } -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -reset_request(struct intel_engine_cs *engine, - struct i915_request *rq, - bool stalled) +static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + /* - * The guilty request will get skipped on a hung engine. - * - * Users of client default contexts do not rely on logical - * state preserved between batches so it is safe to execute - * queued requests following the hang. Non default contexts - * rely on preserved state, so skipping a batch loses the - * evolution of the state and it needs to be considered corrupted. - * Executing more queued batches on top of corrupted state is - * risky. But we take the risk by trying to advance through - * the queued requests in order to make the client behaviour - * more predictable around resets, by not throwing away random - * amount of batches it has prepared for execution. Sophisticated - * clients can use gem_reset_stats_ioctl and dma fence status - * (exported via sync_file info ioctl on explicit fences) to observe - * when it loses the context state and should rebuild accordingly. - * - * The context ban, and ultimately the client ban, mechanism are safety - * valves if client submission ends up resulting in nothing more than - * subsequent hangs. + * Everything depends on having the GTT running, so we need to start + * there. */ + err = i915_ggtt_enable_hw(i915); + if (err) + return err; - if (i915_request_completed(rq)) { - GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n", - engine->name, rq->global_seqno, - rq->fence.context, rq->fence.seqno, - intel_engine_get_seqno(engine)); - stalled = false; - } - - if (stalled) { - context_mark_guilty(rq->gem_context); - i915_request_skip(rq, -EIO); + for_each_engine(engine, i915, id) + intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); - /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(rq->gem_context)) - engine_skip_context(rq); - } else { - /* - * Since this is not the hung engine, it may have advanced - * since the hang declaration. Double check by refinding - * the active request at the time of the reset. - */ - rq = i915_gem_find_active_request(engine); - if (rq) { - unsigned long flags; - - context_mark_innocent(rq->gem_context); - dma_fence_set_error(&rq->fence, -EAGAIN); - - /* Rewind the engine to replay the incomplete rq */ - spin_lock_irqsave(&engine->timeline.lock, flags); - rq = list_prev_entry(rq, link); - if (&rq->link == &engine->timeline.requests) - rq = NULL; - spin_unlock_irqrestore(&engine->timeline.lock, flags); - } - } + i915_gem_restore_fences(i915); - return rq; + return err; } -static void reset_engine(struct intel_engine_cs *engine, - struct i915_request *rq, - bool stalled) +static void reset_finish_engine(struct intel_engine_cs *engine) { - if (rq) - rq = reset_request(engine, rq, stalled); - - /* Setup the CS to resume from the breadcrumb of the hung request */ - engine->reset.reset(engine, rq); + engine->reset.finish(engine); + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } -static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +struct i915_gpu_restart { + struct work_struct work; + struct drm_i915_private *i915; +}; + +static void restart_work(struct work_struct *work) { + struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); + struct drm_i915_private *i915 = arg->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; - lockdep_assert_held(&i915->drm.struct_mutex); - - i915_retire_requests(i915); + wakeref = intel_runtime_pm_get(i915); + mutex_lock(&i915->drm.struct_mutex); + WRITE_ONCE(i915->gpu_error.restart, NULL); for_each_engine(engine, i915, id) { - struct intel_context *ce; - - reset_engine(engine, - engine->hangcheck.active_request, - stalled_mask & ENGINE_MASK(id)); - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); + struct i915_request *rq; /* * Ostensibily, we always want a context loaded for powersaving, * so if the engine is idle after the reset, send a request * to load our scratch kernel_context. - * - * More mysteriously, if we leave the engine idle after a reset, - * the next userspace batch may hang, with what appears to be - * an incoherent read by the CS (presumably stale TLB). An - * empty request appears sufficient to paper over the glitch. */ - if (intel_engine_is_idle(engine)) { - struct i915_request *rq; + if (!intel_engine_is_idle(engine)) + continue; - rq = i915_request_alloc(engine, i915->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } + rq = i915_request_alloc(engine, i915->kernel_context); + if (!IS_ERR(rq)) + i915_request_add(rq); } - i915_gem_restore_fences(i915); -} - -static void reset_finish_engine(struct intel_engine_cs *engine) -{ - engine->reset.finish(engine); + mutex_unlock(&i915->drm.struct_mutex); + intel_runtime_pm_put(i915, wakeref); - intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); + kfree(arg); } static void reset_finish(struct drm_i915_private *i915) @@ -789,27 +722,49 @@ static void reset_finish(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - engine->hangcheck.active_request = NULL; + for_each_engine(engine, i915, id) reset_finish_engine(engine); +} + +static void reset_restart(struct drm_i915_private *i915) +{ + struct i915_gpu_restart *arg; + + /* + * Following the reset, ensure that we always reload context for + * powersaving, and to correct engine->last_retired_context. Since + * this requires us to submit a request, queue a worker to do that + * task for us to evade any locking here. + */ + if (READ_ONCE(i915->gpu_error.restart)) + return; + + arg = kmalloc(sizeof(*arg), GFP_KERNEL); + if (arg) { + arg->i915 = i915; + INIT_WORK(&arg->work, restart_work); + + WRITE_ONCE(i915->gpu_error.restart, arg); + queue_work(i915->wq, &arg->work); } } static void nop_submit_request(struct i915_request *request) { + struct intel_engine_cs *engine = request->engine; unsigned long flags; GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - request->engine->name, - request->fence.context, request->fence.seqno); + engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - intel_engine_write_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline.lock, flags); + i915_request_mark_complete(request); + intel_engine_write_global_seqno(engine, request->global_seqno); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + intel_engine_queue_breadcrumbs(engine); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -864,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_wakeup(engine); + intel_engine_signal_breadcrumbs(engine); } smp_mb__before_atomic(); @@ -882,8 +837,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) struct i915_timeline *tl; bool ret = false; - lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &error->flags)) return true; @@ -904,11 +857,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * * No more can be submitted until we reset the wedged bit. */ - list_for_each_entry(tl, &i915->gt.timelines, link) { + mutex_lock(&i915->gt.timelines.mutex); + list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { struct i915_request *rq; + long timeout; - rq = i915_gem_active_peek(&tl->last_request, - &i915->drm.struct_mutex); + rq = i915_gem_active_get_unlocked(&tl->last_request); if (!rq) continue; @@ -923,12 +877,15 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * and when the seqno passes the fence, the signaler * then signals the fence waking us up). */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) + timeout = dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (timeout < 0) { + mutex_unlock(&i915->gt.timelines.mutex); goto unlock; + } } - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); + mutex_unlock(&i915->gt.timelines.mutex); intel_engines_sanitize(i915, false); @@ -942,7 +899,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * context and do not require stop_machine(). */ intel_engines_reset_default_submission(i915); - i915_gem_contexts_lost(i915); GEM_TRACE("end\n"); @@ -955,6 +911,52 @@ unlock: return ret; } +struct __i915_reset { + struct drm_i915_private *i915; + unsigned int stalled_mask; +}; + +static int __i915_reset__BKL(void *data) +{ + struct __i915_reset *arg = data; + int err; + + err = intel_gpu_reset(arg->i915, ALL_ENGINES); + if (err) + return err; + + return gt_reset(arg->i915, arg->stalled_mask); +} + +#if RESET_UNDER_STOP_MACHINE +/* + * XXX An alternative to using stop_machine would be to park only the + * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP) + * we should be able to prevent their memmory accesses via the lost fence + * registers over the course of the reset without the potential recursive + * of mutexes between the pagefault handler and reset. + * + * See igt/gem_mmap_gtt/hang + */ +#define __do_reset(fn, arg) stop_machine(fn, arg, NULL) +#else +#define __do_reset(fn, arg) fn(arg) +#endif + +static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ + struct __i915_reset arg = { i915, stalled_mask }; + int err, i; + + err = __do_reset(__i915_reset__BKL, &arg); + for (i = 0; err && i < RESET_MAX_RETRIES; i++) { + msleep(100); + err = __do_reset(__i915_reset__BKL, &arg); + } + + return err; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -980,31 +982,22 @@ void i915_reset(struct drm_i915_private *i915, { struct i915_gpu_error *error = &i915->gpu_error; int ret; - int i; GEM_TRACE("flags=%lx\n", error->flags); might_sleep(); - lockdep_assert_held(&i915->drm.struct_mutex); assert_rpm_wakelock_held(i915); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - if (!test_bit(I915_RESET_HANDOFF, &error->flags)) - return; - /* Clear any previous failed attempts at recovery. Time to try again. */ if (!i915_gem_unset_wedged(i915)) - goto wakeup; + return; if (reason) dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - ret = reset_prepare(i915); - if (ret) { - dev_err(i915->drm.dev, "GPU recovery failed\n"); - goto taint; - } + reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1014,32 +1007,11 @@ void i915_reset(struct drm_i915_private *i915, goto error; } - for (i = 0; i < 3; i++) { - ret = intel_gpu_reset(i915, ALL_ENGINES); - if (ret == 0) - break; - - msleep(100); - } - if (ret) { + if (do_reset(i915, stalled_mask)) { dev_err(i915->drm.dev, "Failed to reset chip\n"); goto taint; } - /* Ok, now get things going again... */ - - /* - * Everything depends on having the GTT running, so we need to start - * there. - */ - ret = i915_ggtt_enable_hw(i915); - if (ret) { - DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", - ret); - goto error; - } - - gt_reset(i915, stalled_mask); intel_overlay_reset(i915); /* @@ -1061,9 +1033,8 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); -wakeup: - clear_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_bit(&error->flags, I915_RESET_HANDOFF); + if (!i915_terminally_wedged(error)) + reset_restart(i915); return; taint: @@ -1082,7 +1053,6 @@ taint: add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); - i915_retire_requests(i915); goto finish; } @@ -1108,18 +1078,12 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915, int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) { struct i915_gpu_error *error = &engine->i915->gpu_error; - struct i915_request *active_request; int ret; GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - active_request = reset_prepare_engine(engine); - if (IS_ERR_OR_NULL(active_request)) { - /* Either the previous reset failed, or we pardon the reset. */ - ret = PTR_ERR(active_request); - goto out; - } + reset_prepare_engine(engine); if (msg) dev_notice(engine->i915->drm.dev, @@ -1143,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - reset_engine(engine, active_request, true); + intel_engine_reset(engine, true); /* * The engine and its registers (and workarounds in case of render) @@ -1180,30 +1144,7 @@ static void i915_reset_device(struct drm_i915_private *i915, i915_wedge_on_timeout(&w, i915, 5 * HZ) { intel_prepare_reset(i915); - error->reason = reason; - error->stalled_mask = engine_mask; - - /* Signal that locked waiters should reset the GPU */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_all(&error->wait_queue); - - /* - * Wait for anyone holding the lock to wakeup, without - * blocking indefinitely on struct_mutex. - */ - do { - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_reset(i915, engine_mask, reason); - mutex_unlock(&i915->drm.struct_mutex); - } - } while (wait_on_bit_timeout(&error->flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE, - 1)); - - error->stalled_mask = 0; - error->reason = NULL; + i915_reset(i915, engine_mask, reason); intel_finish_reset(i915); } @@ -1359,6 +1300,25 @@ out: intel_runtime_pm_put(i915, wakeref); } +bool i915_reset_flush(struct drm_i915_private *i915) +{ + int err; + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + flush_workqueue(i915->wq); + GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&i915->drm.struct_mutex); + + return !err; +} + static void i915_wedge_me(struct work_struct *work) { struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index b6a519bde67d..f2d347f319df 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915, int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); +void i915_reset_request(struct i915_request *rq, bool guilty); +bool i915_reset_flush(struct drm_i915_private *i915); + bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 340faea6c08a..d01683167c77 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists, - long queue_priority) +static void assert_priolists(struct intel_engine_execlists * const execlists) { struct rb_node *rb; long last_prio, i; @@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists, GEM_BUG_ON(rb_first_cached(&execlists->queue) != rb_first(&execlists->queue.rb_root)); - last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; + last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); @@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) int idx, i; lockdep_assert_held(&engine->timeline.lock); - assert_priolists(execlists, INT_MAX); + assert_priolists(execlists); /* buckets sorted from highest [in slot 0] to lowest priority */ idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; @@ -239,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } +static bool inflight(const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct i915_request *active; + + if (!i915_request_is_active(rq)) + return false; + + active = port_request(engine->execlists.port); + return active->hw_context == rq->hw_context; +} + static void __i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) { @@ -328,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq, INIT_LIST_HEAD(&dep->dfs_link); engine = sched_lock_engine(node, engine); + lockdep_assert_held(&engine->timeline.lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -353,20 +365,19 @@ static void __i915_schedule(struct i915_request *rq, continue; } - if (prio <= engine->execlists.queue_priority) + if (prio <= engine->execlists.queue_priority_hint) continue; + engine->execlists.queue_priority_hint = prio; + /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ - if (node_to_request(node)->global_seqno && - i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, - node_to_request(node)->global_seqno)) + if (inflight(node_to_request(node), engine)) continue; /* Defer (tasklet) submission until after all of our updates. */ - engine->execlists.queue_priority = prio; tasklet_hi_schedule(&engine->execlists.tasklet); } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index a73472dd12fd..207e21b478f2 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -31,6 +31,7 @@ struct i915_selftest { unsigned long timeout_jiffies; unsigned int timeout_ms; unsigned int random_seed; + char *filter; int mock; int live; }; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 4667cc08c416..5ea3af393ffe 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -9,25 +9,155 @@ #include "i915_timeline.h" #include "i915_syncmap.h" -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *timeline, - const char *name) +struct i915_timeline_hwsp { + struct i915_vma *vma; + struct list_head free_link; + u64 free_bitmap; +}; + +static inline struct i915_timeline_hwsp * +i915_timeline_hwsp(const struct i915_timeline *tl) +{ + return tl->hwsp_ggtt->private; +} + +static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma * +hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) { - lockdep_assert_held(&i915->drm.struct_mutex); + struct drm_i915_private *i915 = timeline->i915; + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; + + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + + spin_lock(>->hwsp_lock); + + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(i915); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + + spin_lock(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void hwsp_free(struct i915_timeline *timeline) +{ + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; + + hwsp = i915_timeline_hwsp(timeline); + if (!hwsp) /* leave global HWSP alone! */ + return; + + spin_lock(>->hwsp_lock); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock(>->hwsp_lock); +} + +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name, + struct i915_vma *hwsp) +{ + void *vaddr; /* * Ideally we want a set of engines on a single leaf as we expect * to mostly be tracking synchronisation between engines. It is not * a huge issue if this is not the case, but we may want to mitigate * any page crossing penalties if they become an issue. + * + * Called during early_init before we know how many engines there are. */ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = name; + timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; - list_add(&timeline->link, &i915->gt.timelines); + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + if (!hwsp) { + unsigned int cacheline; + + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + } + timeline->hwsp_ggtt = i915_vma_get(hwsp); + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + hwsp_free(timeline); + i915_vma_put(hwsp); + return PTR_ERR(vaddr); + } - /* Called during early_init before we know how many engines there are */ + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); timeline->fence_context = dma_fence_context_alloc(1); @@ -37,6 +167,40 @@ void i915_timeline_init(struct drm_i915_private *i915, INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); + + return 0; +} + +void i915_timelines_init(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + mutex_init(>->mutex); + INIT_LIST_HEAD(>->active_list); + + spin_lock_init(>->hwsp_lock); + INIT_LIST_HEAD(>->hwsp_free_list); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(i915, >->mutex); +} + +static void timeline_add_to_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_add(&tl->link, >->active_list); + mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_del(&tl->link); + mutex_unlock(>->mutex); } /** @@ -51,11 +215,11 @@ void i915_timeline_init(struct drm_i915_private *i915, */ void i915_timelines_park(struct drm_i915_private *i915) { + struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *timeline; - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { + mutex_lock(>->mutex); + list_for_each_entry(timeline, >->active_list, link) { /* * All known fences are completed so we can scrap * the current sync point tracking and start afresh, @@ -64,32 +228,87 @@ void i915_timelines_park(struct drm_i915_private *i915) */ i915_syncmap_free(&timeline->sync); } + mutex_unlock(>->mutex); } void i915_timeline_fini(struct i915_timeline *timeline) { + GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); i915_syncmap_free(&timeline->sync); + hwsp_free(timeline); - list_del(&timeline->link); + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + i915_vma_put(timeline->hwsp_ggtt); } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name) +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp) { struct i915_timeline *timeline; + int err; timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); if (!timeline) return ERR_PTR(-ENOMEM); - i915_timeline_init(i915, timeline, name); + err = i915_timeline_init(i915, timeline, name, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + kref_init(&timeline->kref); return timeline; } +int i915_timeline_pin(struct i915_timeline *tl) +{ + int err; + + if (tl->pin_count++) + return 0; + GEM_BUG_ON(!tl->pin_count); + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + + timeline_add_to_active(tl); + + return 0; + +unpin: + tl->pin_count = 0; + return err; +} + +void i915_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + if (--tl->pin_count) + return; + + timeline_remove_from_active(tl); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + void __i915_timeline_free(struct kref *kref) { struct i915_timeline *timeline = @@ -99,6 +318,16 @@ void __i915_timeline_free(struct kref *kref) kfree(timeline); } +void i915_timelines_fini(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + GEM_BUG_ON(!list_empty(>->active_list)); + GEM_BUG_ON(!list_empty(>->hwsp_free_list)); + + mutex_destroy(>->mutex); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_timeline.c" #include "selftests/i915_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 38c1e15e927a..8caeb66d1cd5 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -32,6 +32,9 @@ #include "i915_syncmap.h" #include "i915_utils.h" +struct i915_vma; +struct i915_timeline_hwsp; + struct i915_timeline { u64 fence_context; u32 seqno; @@ -40,6 +43,13 @@ struct i915_timeline { #define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_ENGINE 1 + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + bool has_initial_breadcrumb; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. @@ -66,13 +76,15 @@ struct i915_timeline { struct list_head link; const char *name; + struct drm_i915_private *i915; struct kref kref; }; -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *tl, - const char *name); +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name, + struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); static inline void @@ -95,7 +107,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline, } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name); +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp); static inline struct i915_timeline * i915_timeline_get(struct i915_timeline *timeline) @@ -134,6 +148,11 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +int i915_timeline_pin(struct i915_timeline *tl); +void i915_timeline_unpin(struct i915_timeline *tl); + +void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); +void i915_timelines_fini(struct drm_i915_private *i915); #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 43da14f08dc0..eab313c3163c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -752,31 +752,6 @@ trace_i915_request_out(struct i915_request *rq) #endif #endif -TRACE_EVENT(intel_engine_notify, - TP_PROTO(struct intel_engine_cs *engine, bool waiters), - TP_ARGS(engine, waiters), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u16, class) - __field(u16, instance) - __field(u32, seqno) - __field(bool, waiters) - ), - - TP_fast_assign( - __entry->dev = engine->i915->drm.primary->index; - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->seqno = intel_engine_get_seqno(engine); - __entry->waiters = waiters; - ), - - TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u", - __entry->dev, __entry->class, __entry->instance, - __entry->seqno, __entry->waiters) -); - DEFINE_EVENT(i915_request, i915_request_retire, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b4d78cdb4ca..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) if (--vma->active_count) return; - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) return; @@ -190,33 +187,56 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - /* - * We put the GGTT vma at the start of the vma-list, followed - * by the ppGGTT vma. This allows us to break early when - * iterating over only the GGTT vma for an object, see - * for_each_ggtt_vma() - */ vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - list_add_tail(&vma->obj_link, &obj->vma_list); } + spin_lock(&obj->vma.lock); + rb = NULL; - p = &obj->vma_tree.rb_node; + p = &obj->vma.tree.rb_node; while (*p) { struct i915_vma *pos; + long cmp; rb = *p; pos = rb_entry(rb, struct i915_vma, obj_node); - if (i915_vma_compare(pos, vm, view) < 0) + + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ + cmp = i915_vma_compare(pos, vm, view); + if (cmp == 0) { + spin_unlock(&obj->vma.lock); + kmem_cache_free(vm->i915->vmas, vma); + return pos; + } + + if (cmp < 0) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + + if (i915_vma_is_ggtt(vma)) + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ + list_add(&vma->obj_link, &obj->vma.list); + else + list_add_tail(&vma->obj_link, &obj->vma.list); + + spin_unlock(&obj->vma.lock); + + mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); + mutex_unlock(&vm->mutex); return vma; @@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj, { struct rb_node *rb; - rb = obj->vma_tree.rb_node; + rb = obj->vma.tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; @@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj, { struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); + spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); - if (!vma) + spin_unlock(&obj->vma.lock); + + /* vma_create() will resolve the race if another creates the vma */ + if (unlikely(!vma)) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } @@ -659,7 +681,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + mutex_lock(&vma->vm->mutex); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; @@ -692,8 +716,10 @@ i915_vma_remove(struct i915_vma *vma) vma->ops->clear_pages(vma); + mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -804,10 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); - list_del(&vma->obj_link); + mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); - if (vma->obj) - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + mutex_unlock(&vma->vm->mutex); + + if (vma->obj) { + struct drm_i915_gem_object *obj = vma->obj; + + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma.tree); + spin_unlock(&obj->vma.lock); + } rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); @@ -1003,10 +1037,8 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_active_isset(active) && !vma->active_count++) { - list_move_tail(&vma->vm_link, &vma->vm->active_list); + if (!i915_gem_active_isset(active) && !vma->active_count++) obj->active_count++; - } i915_gem_active_set(active, rq); GEM_BUG_ON(!i915_vma_is_active(vma)); GEM_BUG_ON(!obj->active_count); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..5793abe509a2 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -71,29 +71,42 @@ struct i915_vma { unsigned int open_count; unsigned long flags; /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. + * How many users have pinned this object in GTT space. * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. + * This is a tightly bound, fairly small number of users, so we + * stuff inside the flags field so that we can both check for overflow + * and detect a no-op i915_vma_pin() in a single check, while also + * pinning the vma. + * + * The worst case display setup would have the same vma pinned for + * use on each plane on each crtc, while also building the next atomic + * state and holding a pin for the length of the cleanup queue. In the + * future, the flip queue may be increased from 1. + * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 + * + * For GEM, the number of concurrent users for pwrite/pread is + * unbounded. For execbuffer, it is currently one but will in future + * be extended to allow multiple clients to pin vma concurrently. + * + * We also use suballocated pages, with each suballocation claiming + * its own pin on the shared vma. At present, this is limited to + * exclusive cachelines of a single page, so a maximum of 64 possible + * users. */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) +#define I915_VMA_PIN_MASK 0xff +#define I915_VMA_PIN_OVERFLOW BIT(8) /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_GLOBAL_BIND BIT(9) +#define I915_VMA_LOCAL_BIND BIT(10) #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) -#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_GGTT BIT(11) +#define I915_VMA_CAN_FENCE BIT(12) +#define I915_VMA_CLOSED BIT(13) +#define I915_VMA_USERFAULT_BIT 14 #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(12) +#define I915_VMA_GGTT_WRITE BIT(15) unsigned int active_count; struct rb_root active; @@ -425,7 +438,7 @@ void i915_vma_parked(struct drm_i915_private *i915); * or the list is empty ofc. */ #define for_each_ggtt_vma(V, OBJ) \ - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) #endif diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 9a2fdc77ebcb..a1a263026574 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane, } int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, - struct intel_crtc_state *crtc_state, + struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, - struct intel_plane_state *intel_state) + struct intel_plane_state *new_plane_state) { - struct drm_plane *plane = intel_state->base.plane; - struct drm_plane_state *state = &intel_state->base; - struct intel_plane *intel_plane = to_intel_plane(plane); + struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); int ret; - crtc_state->active_planes &= ~BIT(intel_plane->id); - crtc_state->nv12_planes &= ~BIT(intel_plane->id); - intel_state->base.visible = false; + new_crtc_state->active_planes &= ~BIT(plane->id); + new_crtc_state->nv12_planes &= ~BIT(plane->id); + new_plane_state->base.visible = false; - /* If this is a cursor plane, no further checks are needed. */ - if (!intel_state->base.crtc && !old_plane_state->base.crtc) + if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) return 0; - ret = intel_plane->check_plane(crtc_state, intel_state); + ret = plane->check_plane(new_crtc_state, new_plane_state); if (ret) return ret; /* FIXME pre-g4x don't work like this */ - if (state->visible) - crtc_state->active_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible) + new_crtc_state->active_planes |= BIT(plane->id); - if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) - crtc_state->nv12_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible && + new_plane_state->base.fb->format->format == DRM_FORMAT_NV12) + new_crtc_state->nv12_planes |= BIT(plane->id); - if (state->visible || old_plane_state->base.visible) - crtc_state->update_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible || old_plane_state->base.visible) + new_crtc_state->update_planes |= BIT(plane->id); return intel_plane_atomic_calc_changes(old_crtc_state, - &crtc_state->base, + &new_crtc_state->base, old_plane_state, - state); + &new_plane_state->base); } static int intel_plane_atomic_check(struct drm_plane *plane, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 561a4f9f044c..b508d8a735e0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv) struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; + /* + * VBT has the TypeC mode (native,TBT/USB) and we don't want + * to detect it. + */ + if (intel_port_is_tc(dev_priv, port)) + continue; + info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; info->supports_dp = (port != PORT_E); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b58915b8708b..cacaa1d04d17 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -29,168 +29,146 @@ #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq) -static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +static void irq_enable(struct intel_engine_cs *engine) { - struct intel_wait *wait; - unsigned int result = 0; - - lockdep_assert_held(&b->irq_lock); - - wait = b->irq_wait; - if (wait) { - /* - * N.B. Since task_asleep() and ttwu are not atomic, the - * waiter may actually go to sleep after the check, causing - * us to suppress a valid wakeup. We prefer to reduce the - * number of false positive missed_breadcrumb() warnings - * at the expense of a few false negatives, as it it easy - * to trigger a false positive under heavy load. Enough - * signal should remain from genuine missed_breadcrumb() - * for us to detect in CI. - */ - bool was_asleep = task_asleep(wait->tsk); - - result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk) && was_asleep) - result |= ENGINE_WAKEUP_ASLEEP; - } + if (!engine->irq_enable) + return; - return result; + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static void irq_disable(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - unsigned int result; - - spin_lock_irqsave(&b->irq_lock, flags); - result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - return result; -} + if (!engine->irq_disable) + return; -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); } -static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer(__func__); + lockdep_assert_held(&b->irq_lock); - intel_engine_dump(engine, &p, - "%s missed breadcrumb at %pS\n", - engine->name, __builtin_return_address(0)); - } + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) + irq_disable(container_of(b, + struct intel_engine_cs, + breadcrumbs)); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + b->irq_armed = false; } -static void intel_breadcrumbs_hangcheck(struct timer_list *t) +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned int irq_count; if (!b->irq_armed) return; - irq_count = READ_ONCE(b->irq_count); - if (b->hangcheck_interrupts != irq_count) { - b->hangcheck_interrupts = irq_count; - mod_timer(&b->hangcheck, wait_timeout()); - return; - } + spin_lock_irq(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irq(&b->irq_lock); +} - /* We keep the hangcheck timer alive until we disarm the irq, even - * if there are no waiters at present. - * - * If the waiter was currently running, assume it hasn't had a chance - * to process the pending interrupt (e.g, low priority task on a loaded - * system) and wait until it sleeps before declaring a missed interrupt. - * - * If the waiter was asleep (and not even pending a wakeup), then we - * must have missed an interrupt as the GPU has stopped advancing - * but we still have a waiter. Assuming all batches complete within - * DRM_I915_HANGCHECK_JIFFIES [1.5s]! - */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { - missed_breadcrumb(engine); - mod_timer(&b->fake_irq, jiffies + 1); - } else { - mod_timer(&b->hangcheck, wait_timeout()); - } +static inline bool __request_completed(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); } -static void intel_breadcrumbs_fake_irq(struct timer_list *t) +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce, *cn; + struct list_head *pos, *next; + LIST_HEAD(signal); - /* - * The timer persists in case we cannot enable interrupts, - * or if we have previously seen seqno/interrupt incoherency - * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). - * Here the worker will wake up every jiffie in order to kick the - * oldest waiter to do the coherent seqno check. - */ + spin_lock(&b->irq_lock); - spin_lock_irq(&b->irq_lock); - if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irq(&b->irq_lock); - if (!b->irq_armed) - return; + if (b->irq_armed && list_empty(&b->signalers)) + __intel_breadcrumbs_disarm_irq(b); - /* If the user has disabled the fake-irq, restore the hangchecking */ - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { - mod_timer(&b->hangcheck, wait_timeout()); - return; + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { + GEM_BUG_ON(list_empty(&ce->signals)); + + list_for_each_safe(pos, next, &ce->signals) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + if (!__request_completed(rq)) + break; + + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + /* + * We may race with direct invocation of + * dma_fence_signal(), e.g. i915_request_retire(), + * in which case we can skip processing it ourselves. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) + continue; + + /* + * Queue for execution after dropping the signaling + * spinlock as the callback chain may end up adding + * more signalers to the same context or engine. + */ + i915_request_get(rq); + list_add_tail(&rq->signal_link, &signal); + } + + /* + * We process the list deletion in bulk, only using a list_add + * (not list_move) above but keeping the status of + * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. + */ + if (!list_is_first(pos, &ce->signals)) { + /* Advance the list to the first incomplete request */ + __list_del_many(&ce->signals, pos); + if (&ce->signals == pos) /* now empty */ + list_del_init(&ce->signal_link); + } } - mod_timer(&b->fake_irq, jiffies + 1); -} + spin_unlock(&b->irq_lock); -static void irq_enable(struct intel_engine_cs *engine) -{ - if (!engine->irq_enable) - return; + list_for_each_safe(pos, next, &signal) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + dma_fence_signal(&rq->fence); + i915_request_put(rq); + } + + return !list_empty(&signal); } -static void irq_disable(struct intel_engine_cs *engine) +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { - if (!engine->irq_disable) - return; + bool result; - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + local_irq_disable(); + result = intel_engine_breadcrumbs_irq(engine); + local_irq_enable(); + + return result; } -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +static void signal_irq_work(struct irq_work *work) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->irq_lock); - GEM_BUG_ON(b->irq_wait); - GEM_BUG_ON(!b->irq_armed); - - GEM_BUG_ON(!b->irq_enabled); - if (!--b->irq_enabled) - irq_disable(engine); + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), breadcrumbs.irq_work); - b->irq_armed = false; + intel_engine_breadcrumbs_irq(engine); } void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) @@ -215,77 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) spin_unlock_irq(&b->irq_lock); } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait, *n; - - if (!b->irq_armed) - return; - - /* - * We only disarm the irq when we are idle (all requests completed), - * so if the bottom-half remains asleep, it missed the request - * completion. - */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) - missed_breadcrumb(engine); - - spin_lock_irq(&b->rb_lock); - - spin_lock(&b->irq_lock); - b->irq_wait = NULL; - if (b->irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock(&b->irq_lock); - - rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { - GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno)); - RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); - } - b->waiters = RB_ROOT; - - spin_unlock_irq(&b->rb_lock); -} - -static bool use_fake_irq(const struct intel_breadcrumbs *b) -{ - const struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) - return false; - - /* - * Only start with the heavy weight fake irq timer if we have not - * seen any interrupts since enabling it the first time. If the - * interrupts are still arriving, it means we made a mistake in our - * engine->seqno_barrier(), a timing error that should be transient - * and unlikely to reoccur. - */ - return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; -} - -static void enable_fake_irq(struct intel_breadcrumbs *b) -{ - /* Ensure we never sleep indefinitely */ - if (!b->irq_enabled || use_fake_irq(b)) - mod_timer(&b->fake_irq, jiffies + 1); - else - mod_timer(&b->hangcheck, wait_timeout()); -} - -static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); - struct drm_i915_private *i915 = engine->i915; - bool enabled; lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return false; + return; /* * The breadcrumb irq will be disarmed on the interrupt after the @@ -303,548 +218,130 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * the driver is idle) we disarm the breadcrumbs. */ - /* No interrupts? Kick the waiter every jiffie! */ - enabled = false; - if (!b->irq_enabled++ && - !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + if (!b->irq_enabled++) irq_enable(engine); - enabled = true; - } - - enable_fake_irq(b); - return enabled; -} - -static inline struct intel_wait *to_wait(struct rb_node *node) -{ - return rb_entry(node, struct intel_wait, node); } -static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, - struct intel_wait *wait) -{ - lockdep_assert_held(&b->rb_lock); - GEM_BUG_ON(b->irq_wait == wait); - - /* - * This request is completed, so remove it from the tree, mark it as - * complete, and *then* wake up the associated task. N.B. when the - * task wakes up, it will find the empty rb_node, discern that it - * has already been removed from the tree and skip the serialisation - * of the b->rb_lock and b->irq_lock. This means that the destruction - * of the intel_wait is not serialised with the interrupt handler - * by the waiter - it must instead be serialised by the caller. - */ - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); - - if (wait->tsk->state != TASK_RUNNING) - wake_up_process(wait->tsk); /* implicit smp_wmb() */ -} - -static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, - struct rb_node *next) +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - spin_lock(&b->irq_lock); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(!b->irq_wait); - b->irq_wait = to_wait(next); - spin_unlock(&b->irq_lock); - - /* We always wake up the next waiter that takes over as the bottom-half - * as we may delegate not only the irq-seqno barrier to the next waiter - * but also the task of waking up concurrent waiters. - */ - if (next) - wake_up_process(to_wait(next)->tsk); -} - -static bool __intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node **p, *parent, *completed; - bool first, armed; - u32 seqno; - - GEM_BUG_ON(!wait->seqno); - - /* Insert the request into the retirement ordered list - * of waiters by walking the rbtree. If we are the oldest - * seqno in the tree (the first to be retired), then - * set ourselves as the bottom-half. - * - * As we descend the tree, prune completed branches since we hold the - * spinlock we know that the first_waiter must be delayed and can - * reduce some of the sequential wake up latency if we take action - * ourselves and wake up the completed tasks in parallel. Also, by - * removing stale elements in the tree, we may be able to reduce the - * ping-pong between the old bottom-half and ourselves as first-waiter. - */ - armed = false; - first = true; - parent = NULL; - completed = NULL; - seqno = intel_engine_get_seqno(engine); - - /* If the request completed before we managed to grab the spinlock, - * return now before adding ourselves to the rbtree. We let the - * current bottom-half handle any pending wakeups and instead - * try and get out of the way quickly. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - RB_CLEAR_NODE(&wait->node); - return first; - } - - p = &b->waiters.rb_node; - while (*p) { - parent = *p; - if (wait->seqno == to_wait(parent)->seqno) { - /* We have multiple waiters on the same seqno, select - * the highest priority task (that with the smallest - * task->prio) to serve as the bottom-half for this - * group. - */ - if (wait->tsk->prio > to_wait(parent)->tsk->prio) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } else if (i915_seqno_passed(wait->seqno, - to_wait(parent)->seqno)) { - p = &parent->rb_right; - if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) - completed = parent; - else - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&wait->node, parent, p); - rb_insert_color(&wait->node, &b->waiters); - - if (first) { - spin_lock(&b->irq_lock); - b->irq_wait = wait; - /* After assigning ourselves as the new bottom-half, we must - * perform a cursory check to prevent a missed interrupt. - * Either we miss the interrupt whilst programming the hardware, - * or if there was a previous waiter (for a later seqno) they - * may be woken instead of us (due to the inherent race - * in the unlocked read of b->irq_seqno_bh in the irq handler) - * and so we miss the wake up. - */ - armed = __intel_breadcrumbs_enable_irq(b); - spin_unlock(&b->irq_lock); - } - - if (completed) { - /* Advance the bottom-half (b->irq_wait) before we wake up - * the waiters who may scribble over their intel_wait - * just as the interrupt handler is dereferencing it via - * b->irq_wait. - */ - if (!first) { - struct rb_node *next = rb_next(completed); - GEM_BUG_ON(next == &wait->node); - __intel_breadcrumbs_next(engine, next); - } - - do { - struct intel_wait *crumb = to_wait(completed); - completed = rb_prev(completed); - __intel_breadcrumbs_finish(b, crumb); - } while (completed); - } - - GEM_BUG_ON(!b->irq_wait); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); + spin_lock_init(&b->irq_lock); + INIT_LIST_HEAD(&b->signalers); - return armed; + init_irq_work(&b->irq_work, signal_irq_work); } -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool armed; - - spin_lock_irq(&b->rb_lock); - armed = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); - if (armed) - return armed; - - /* Make the caller recheck if its request has already started. */ - return intel_engine_has_started(engine, wait->seqno); -} + unsigned long flags; -static inline bool chain_wakeup(struct rb_node *rb, int priority) -{ - return rb && to_wait(rb)->tsk->prio <= priority; -} + spin_lock_irqsave(&b->irq_lock, flags); -static inline int wakeup_priority(struct intel_breadcrumbs *b, - struct task_struct *tsk) -{ - if (tsk == b->signaler) - return INT_MIN; + if (b->irq_enabled) + irq_enable(engine); else - return tsk->prio; -} - -static void __intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - if (RB_EMPTY_NODE(&wait->node)) - goto out; - - if (b->irq_wait == wait) { - const int priority = wakeup_priority(b, wait->tsk); - struct rb_node *next; - - /* We are the current bottom-half. Find the next candidate, - * the first waiter in the queue on the remaining oldest - * request. As multiple seqnos may complete in the time it - * takes us to wake up and find the next waiter, we have to - * wake up that waiter for it to perform its own coherent - * completion check. - */ - next = rb_next(&wait->node); - if (chain_wakeup(next, priority)) { - /* If the next waiter is already complete, - * wake it up and continue onto the next waiter. So - * if have a small herd, they will wake up in parallel - * rather than sequentially, which should reduce - * the overall latency in waking all the completed - * clients. - * - * However, waking up a chain adds extra latency to - * the first_waiter. This is undesirable if that - * waiter is a high priority task. - */ - u32 seqno = intel_engine_get_seqno(engine); - - while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { - struct rb_node *n = rb_next(next); - - __intel_breadcrumbs_finish(b, to_wait(next)); - next = n; - if (!chain_wakeup(next, priority)) - break; - } - } - - __intel_breadcrumbs_next(engine, next); - } else { - GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); - } - - GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); + irq_disable(engine); -out: - GEM_BUG_ON(b->irq_wait == wait); - GEM_BUG_ON(rb_first(&b->waiters) != - (b->irq_wait ? &b->irq_wait->node : NULL)); + spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) { - GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); - return; - } - - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); } -static void signaler_set_rtpriority(void) +bool i915_request_enable_breadcrumb(struct i915_request *rq) { - struct sched_param param = { .sched_priority = 1 }; - - sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); -} + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; -static int intel_breadcrumbs_signaler(void *arg) -{ - struct intel_engine_cs *engine = arg; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *rq, *n; + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - /* Install ourselves with high priority to reduce signalling latency */ - signaler_set_rtpriority(); + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; - do { - bool do_schedule = true; - LIST_HEAD(list); - u32 seqno; + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && + !__request_completed(rq)) { + struct intel_context *ce = rq->hw_context; + struct list_head *pos; - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&b->signals)) - goto sleep; + __intel_breadcrumbs_arm_irq(b); /* - * We are either woken up by the interrupt bottom-half, - * or by a client adding a new signaller. In both cases, - * the GPU seqno may have advanced beyond our oldest signal. - * If it has, propagate the signal, remove the waiter and - * check again with the next oldest signal. Otherwise we - * need to wait for a new interrupt from the GPU or for - * a new client. + * We keep the seqno in retirement order, so we can break + * inside intel_engine_breadcrumbs_irq as soon as we've passed + * the last completed request (or seen a request that hasn't + * event started). We could iterate the timeline->requests list, + * but keeping a separate signalers_list has the advantage of + * hopefully being much smaller than the full list and so + * provides faster iteration and detection when there are no + * more interrupts required for this context. + * + * We typically expect to add new signalers in order, so we + * start looking for our insertion point from the tail of + * the list. */ - seqno = intel_engine_get_seqno(engine); - - spin_lock_irq(&b->rb_lock); - list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { - u32 this = rq->signaling.wait.seqno; - - GEM_BUG_ON(!rq->signaling.wait.seqno); + list_for_each_prev(pos, &ce->signals) { + struct i915_request *it = + list_entry(pos, typeof(*it), signal_link); - if (!i915_seqno_passed(seqno, this)) + if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) break; - - if (likely(this == i915_request_global_seqno(rq))) { - __intel_engine_remove_wait(engine, - &rq->signaling.wait); - - rq->signaling.wait.seqno = 0; - __list_del_entry(&rq->signaling.link); - - if (!i915_request_signaled(rq)) { - list_add_tail(&rq->signaling.link, - &list); - i915_request_get(rq); - } - } } - spin_unlock_irq(&b->rb_lock); - - if (!list_empty(&list)) { - local_bh_disable(); - list_for_each_entry_safe(rq, n, &list, signaling.link) { - dma_fence_signal(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - i915_request_put(rq); - } - local_bh_enable(); /* kick start the tasklets */ - - /* - * If the engine is saturated we may be continually - * processing completed requests. This angers the - * NMI watchdog if we never let anything else - * have access to the CPU. Let's pretend to be nice - * and relinquish the CPU if we burn through the - * entire RT timeslice! - */ - do_schedule = need_resched(); - } - - if (unlikely(do_schedule)) { -sleep: - if (kthread_should_park()) - kthread_parkme(); + list_add(&rq->signal_link, pos); + if (pos == &ce->signals) /* catch transitions from empty list */ + list_move_tail(&ce->signal_link, &b->signalers); - if (unlikely(kthread_should_stop())) - break; - - schedule(); - } - } while (1); - __set_current_state(TASK_RUNNING); - - return 0; -} - -static void insert_signal(struct intel_breadcrumbs *b, - struct i915_request *request, - const u32 seqno) -{ - struct i915_request *iter; - - lockdep_assert_held(&b->rb_lock); - - /* - * A reasonable assumption is that we are called to add signals - * in sequence, as the requests are submitted for execution and - * assigned a global_seqno. This will be the case for the majority - * of internally generated signals (inter-engine signaling). - * - * Out of order waiters triggering random signaling enabling will - * be more problematic, but hopefully rare enough and the list - * small enough that the O(N) insertion sort is not an issue. - */ - - list_for_each_entry_reverse(iter, &b->signals, signaling.link) - if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) - break; - - list_add(&request->signaling.link, &iter->signaling.link); -} - -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait = &request->signaling.wait; - u32 seqno; - - /* - * Note that we may be called from an interrupt handler on another - * device (e.g. nouveau signaling a fence completion causing us - * to submit a request, and so enable signaling). As such, - * we need to make sure that all other users of b->rb_lock protect - * against interrupts, i.e. use spin_lock_irqsave. - */ - - /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); - - seqno = i915_request_global_seqno(request); - if (!seqno) /* will be enabled later upon execution */ - return true; - - GEM_BUG_ON(wait->seqno); - wait->tsk = b->signaler; - wait->request = request; - wait->seqno = seqno; - - /* - * Add ourselves into the list of waiters, but registering our - * bottom-half as the signaller thread. As per usual, only the oldest - * waiter (not just signaller) is tasked as the bottom-half waking - * up all completed waiters after the user interrupt. - * - * If we are the oldest waiter, enable the irq (after which we - * must double check that the seqno did not complete). - */ - spin_lock(&b->rb_lock); - insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, wait); - spin_unlock(&b->rb_lock); - - if (wakeup) { - wake_up_process(b->signaler); - return !intel_wait_complete(wait); + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); } + spin_unlock(&b->irq_lock); - return true; + return !__request_completed(rq); } -void intel_engine_cancel_signaling(struct i915_request *request) +void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - if (!READ_ONCE(request->signaling.wait.seqno)) + if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return; - spin_lock(&b->rb_lock); - __intel_engine_remove_wait(engine, &request->signaling.wait); - if (fetch_and_zero(&request->signaling.wait.seqno)) - __list_del_entry(&request->signaling.link); - spin_unlock(&b->rb_lock); -} - -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct task_struct *tsk; - - spin_lock_init(&b->rb_lock); - spin_lock_init(&b->irq_lock); - - timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); - timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); - - INIT_LIST_HEAD(&b->signals); - - /* Spawn a thread to provide a common bottom-half for all signals. - * As this is an asynchronous interface we cannot steal the current - * task for handling the bottom-half to the user interrupt, therefore - * we create a thread to do the coherent seqno dance after the - * interrupt and then signal the waitqueue (via the dma-buf/fence). - */ - tsk = kthread_run(intel_breadcrumbs_signaler, engine, - "i915/signal:%d", engine->id); - if (IS_ERR(tsk)) - return PTR_ERR(tsk); - - b->signaler = tsk; - - return 0; -} - -static void cancel_fake_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ - del_timer_sync(&b->hangcheck); - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + struct intel_context *ce = rq->hw_context; - /* - * Leave the fake_irq timer enabled (if it is running), but clear the - * bit so that it turns itself off on its next wake up and goes back - * to the long hangcheck interval if still required. - */ - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + list_del(&rq->signal_link); + if (list_empty(&ce->signals)) + list_del_init(&ce->signal_link); - if (b->irq_enabled) - irq_enable(engine); - else - irq_disable(engine); - - spin_unlock_irqrestore(&b->irq_lock, flags); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); } -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce; + struct i915_request *rq; - /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->irq_wait)); - WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(!list_empty(&b->signals)); + if (list_empty(&b->signalers)) + return; - if (!IS_ERR_OR_NULL(b->signaler)) - kthread_stop(b->signaler); + drm_printf(p, "Signals:\n"); - cancel_fake_irq(engine); + spin_lock_irq(&b->irq_lock); + list_for_each_entry(ce, &b->signalers, signal_link) { + list_for_each_entry(rq, &ce->signals, signal_link) { + drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + } + } + spin_unlock_irq(&b->irq_lock); } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_breadcrumbs.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index bc7589656a8f..4b0044cdcf1a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state) dev_priv->display.load_luts(crtc_state); } +static int check_lut_size(const struct drm_property_blob *lut, int expected) +{ + int len; + + if (!lut) + return 0; + + len = drm_color_lut_size(lut); + if (len != expected) { + DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n", + len, expected); + return -EINVAL; + } + + return 0; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - size_t gamma_length, degamma_length; - uint32_t tests = DRM_COLOR_LUT_NON_DECREASING; + int gamma_length, degamma_length; + u32 gamma_tests, degamma_tests; degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; + degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests; + gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests; - /* - * All of our platforms mandate that the degamma curve be - * non-decreasing. Additionally, GLK and gen11 only accept a single - * value for red, green, and blue in the degamma table. Make sure - * userspace didn't try to pass us something we can't handle. - * - * We don't have any extra hardware constraints on the gamma table, - * so no need to explicitly check it. - */ - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - tests |= DRM_COLOR_LUT_EQUAL_CHANNELS; + /* Always allow legacy gamma LUT with no further checking. */ + if (crtc_state_is_legacy_gamma(crtc_state)) + return 0; - if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0) + if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) || + check_lut_size(crtc_state->base.gamma_lut, gamma_length)) return -EINVAL; - /* - * We allow both degamma & gamma luts at the right size or - * NULL. - */ - if ((!crtc_state->base.degamma_lut || - drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) && - (!crtc_state->base.gamma_lut || - drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length)) - return 0; + if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) || + drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests)) + return -EINVAL; - /* - * We also allow no degamma lut/ctm and a gamma lut at the legacy - * size (256 entries). - */ - if (crtc_state_is_legacy_gamma(crtc_state)) - return 0; - return -EINVAL; + return 0; } void intel_color_init(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index acd94354afc8..ca705546a0ab 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } -static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, +static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { const struct intel_shared_dpll *pll = crtc_state->shared_dpll; @@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, switch (id) { default: + /* + * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used + * here, so do warn if this get passed in + */ MISSING_CASE(id); - /* fall through */ - case DPLL_ID_ICL_DPLL0: - case DPLL_ID_ICL_DPLL1: return DDI_CLK_SEL_NONE; case DPLL_ID_ICL_TBTPLL: switch (clock) { @@ -1021,7 +1022,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, return DDI_CLK_SEL_TBT_810; default: MISSING_CASE(clock); - break; + return DDI_CLK_SEL_NONE; } case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL2: @@ -1391,16 +1392,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv, static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, enum port port) { + enum tc_port tc_port = intel_port_to_tc(dev_priv, port); u32 mg_pll_div0, mg_clktop_hsclkctl; u32 m1, m2_int, m2_frac, div1, div2, refclk; u64 tmp; refclk = dev_priv->cdclk.hw.ref; - mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); - m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK; + m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK; m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> @@ -2868,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (IS_ICELAKE(dev_priv)) { if (!intel_port_is_combophy(dev_priv, port)) I915_WRITE(DDI_CLK_SEL(port), - icl_pll_to_ddi_pll_sel(encoder, crtc_state)); + icl_pll_to_ddi_clk_sel(encoder, crtc_state)); } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 957c6527f76b..7bf09cef591a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -189,6 +189,8 @@ struct intel_device_info { struct color_luts { u16 degamma_lut_size; u16 gamma_lut_size; + u32 degamma_lut_tests; + u32 gamma_lut_tests; } color; }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6f0d78436e3..df7a7a310f2f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) return crtc->pipe; } +static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * On i965gm the hardware frame counter reads + * zero when the TV encoder is enabled :( + */ + if (IS_I965GM(dev_priv) && + (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT))) + return 0; + + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + return 0xffffffff; /* full 32 bit counter */ + else if (INTEL_GEN(dev_priv) >= 3) + return 0xffffff; /* only 24 bits of frame count */ + else + return 0; /* Gen2 doesn't have a hardware frame counter */ +} + +static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + drm_crtc_set_max_vblank_count(&crtc->base, + intel_crtc_max_vblank_count(crtc_state)); + drm_crtc_vblank_on(&crtc->base); +} + static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); @@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) * when it's derived from the timestamps. So let's wait for the * pipe to start properly before we call drm_crtc_vblank_on() */ - if (dev_priv->drm.max_vblank_count == 0) + if (intel_crtc_max_vblank_count(new_crtc_state) == 0) intel_wait_for_pipe_scanline_moving(crtc); } @@ -3901,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta else if (old_crtc_state->pch_pfit.enabled) ironlake_pfit_disable(old_crtc_state); } + + /* + * We don't (yet) allow userspace to control the pipe background color, + * so force it to black, but apply pipe gamma and CSC so that its + * handling will match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); } static void intel_fdi_normal_train(struct intel_crtc *crtc) @@ -5678,7 +5717,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, ironlake_pch_enable(old_intel_state, pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -5832,7 +5871,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -6171,7 +6210,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -6230,7 +6269,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -9416,7 +9455,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv, if (WARN_ON(!intel_dpll_is_combophy(id))) return; } else if (intel_port_is_tc(dev_priv, port)) { - id = icl_port_to_mg_pll_id(port); + id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port)); } else { WARN(1, "Invalid port %x\n", port); return; @@ -11690,6 +11729,23 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...) va_end(args); } +static bool fastboot_enabled(struct drm_i915_private *dev_priv) +{ + if (i915_modparams.fastboot != -1) + return i915_modparams.fastboot; + + /* Enable fastboot by default on Skylake and newer */ + if (INTEL_GEN(dev_priv) >= 9) + return true; + + /* Enable fastboot by default on VLV and CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return true; + + /* Disabled by default on all others */ + return false; +} + static bool intel_pipe_config_compare(struct drm_i915_private *dev_priv, struct intel_crtc_state *current_config, @@ -11701,7 +11757,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); - if (fixup_inherited && !i915_modparams.fastboot) { + if (fixup_inherited && !fastboot_enabled(dev_priv)) { DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); ret = false; } @@ -12778,8 +12834,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; + struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)]; - if (!dev->max_vblank_count) + if (!vblank->max_vblank_count) return (u32)drm_crtc_accurate_vblank_count(&crtc->base); return dev->driver->get_vblank_counter(dev, crtc->pipe); @@ -14327,8 +14384,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On some ICL SKUs port F is not present. No strap bits for * this, so rely on VBT. + * Work around broken VBTs on SKUs known to have no port F. */ - if (intel_bios_is_port_present(dev_priv, PORT_F)) + if (IS_ICL_WITH_PORT_F(dev_priv) && + intel_bios_is_port_present(dev_priv, PORT_F)) intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); @@ -14680,14 +14739,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); - if (fb->format->format == DRM_FORMAT_NV12 && - (fb->width < SKL_MIN_YUV_420_SRC_W || - fb->height < SKL_MIN_YUV_420_SRC_H || - (fb->width % 4) != 0 || (fb->height % 4) != 0)) { - DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); - goto err; - } - for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -15457,6 +15508,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, plane->base.type != DRM_PLANE_TYPE_PRIMARY) intel_plane_disable_noatomic(crtc, plane); } + + /* + * Disable any background color set by the BIOS, but enable the + * gamma and CSC to match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); } /* Adjust the state of the output pipe according to whether we @@ -15493,16 +15553,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, } } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram + * the hardware when a high res displays plugged in. DPLL P + * divider is zero, and the pipe timings are bonkers. We'll + * try to disable everything in that case. + * + * FIXME would be nice to be able to sanitize this state + * without several WARNs, but for now let's take the easy + * road. + */ + return IS_GEN(dev_priv, 6) && + crtc_state->base.active && + crtc_state->shared_dpll && + crtc_state->port_clock == 0; +} + static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_connector *connector; + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc_state *crtc_state = crtc ? + to_intel_crtc_state(crtc->base.state) : NULL; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ - bool has_active_crtc = encoder->base.crtc && - to_intel_crtc(encoder->base.crtc)->active; + bool has_active_crtc = crtc_state && + crtc_state->base.active; + + if (crtc_state && has_bogus_dpll_config(crtc_state)) { + DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", + pipe_name(crtc->pipe)); + has_active_crtc = false; + } connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { @@ -15513,16 +15602,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ - if (encoder->base.crtc) { - struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + if (crtc_state) { + struct drm_encoder *best_encoder; DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); + + /* avoid oopsing in case the hooks consult best_encoder */ + best_encoder = connector->base.state->best_encoder; + connector->base.state->best_encoder = &encoder->base; + if (encoder->disable) - encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->disable(encoder, crtc_state, + connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->post_disable(encoder, crtc_state, + connector->base.state); + + connector->base.state->best_encoder = best_encoder; } encoder->base.crtc = NULL; @@ -15894,10 +15992,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev, * waits, so we need vblank interrupts restored beforehand. */ for_each_intel_crtc(&dev_priv->drm, crtc) { + crtc_state = to_intel_crtc_state(crtc->base.state); + drm_crtc_vblank_reset(&crtc->base); - if (crtc->base.state->active) - drm_crtc_vblank_on(&crtc->base); + if (crtc_state->base.active) + intel_crtc_vblank_on(crtc_state); } intel_sanitize_plane_mapping(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 606f54dde086..0a42d11c4c33 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc, enum intel_dpll_id range_max) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_shared_dpll *pll; + struct intel_shared_dpll *pll, *unused_pll = NULL; struct intel_shared_dpll_state *shared_dpll; enum intel_dpll_id i; @@ -257,8 +257,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc, pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ - if (shared_dpll[i].crtc_mask == 0) + if (shared_dpll[i].crtc_mask == 0) { + if (!unused_pll) + unused_pll = pll; continue; + } if (memcmp(&crtc_state->dpll_hw_state, &shared_dpll[i].hw_state, @@ -273,14 +276,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc, } /* Ok no matching timings, maybe there's a free one? */ - for (i = range_min; i <= range_max; i++) { - pll = &dev_priv->shared_dplls[i]; - if (shared_dpll[i].crtc_mask == 0) { - DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", - crtc->base.base.id, crtc->base.name, - pll->info->name); - return pll; - } + if (unused_pll) { + DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", + crtc->base.base.id, crtc->base.name, + unused_pll->info->name); + return unused_pll; } return NULL; @@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, return link_clock; } -static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id) { - return id - DPLL_ID_ICL_MGPLL1 + PORT_C; + return id - DPLL_ID_ICL_MGPLL1; } -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) { - return port - PORT_C + DPLL_ID_ICL_MGPLL1; + return tc_port + DPLL_ID_ICL_MGPLL1; } bool intel_dpll_is_combophy(enum intel_dpll_id id) @@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, ret = icl_calc_dpll_state(crtc_state, encoder, clock, &pll_state); } else { - min = icl_port_to_mg_pll_id(port); + enum tc_port tc_port; + + tc_port = intel_port_to_tc(dev_priv, port); + min = icl_tc_port_to_pll_id(tc_port); max = min; ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, &pll_state); @@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) return CNL_DPLL_ENABLE(id); else if (id == DPLL_ID_ICL_TBTPLL) return TBT_PLL_ENABLE; - else - /* - * TODO: Make MG_PLL macros use - * tc port id instead of port id - */ - return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + + return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id)); } static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; bool ret = false; - enum port port; u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); } else { - port = icl_mg_pll_id_to_port(id); - hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port)); hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_clktop2_coreclkctl1 = - I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); hw_state->mg_clktop2_coreclkctl1 &= MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; hw_state->mg_clktop2_hsclkctl = - I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); hw_state->mg_clktop2_hsclkctl &= MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; - hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); - hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); - hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); - hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port)); - hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port)); hw_state->mg_pll_tdc_coldst_bias = - I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); if (dev_priv->cdclk.hw.ref == 38400) { hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; @@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; - enum port port = icl_mg_pll_id_to_port(pll->info->id); + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); u32 val; /* @@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, * during the calc/readout phase if the mask depends on some other HW * state like refclk, see icl_calc_mg_pll_state(). */ - val = I915_READ(MG_REFCLKIN_CTL(port)); + val = I915_READ(MG_REFCLKIN_CTL(tc_port)); val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val |= hw_state->mg_refclkin_ctl; - I915_WRITE(MG_REFCLKIN_CTL(port), val); + I915_WRITE(MG_REFCLKIN_CTL(tc_port), val); - val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val |= hw_state->mg_clktop2_coreclkctl1; - I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val); - val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); val |= hw_state->mg_clktop2_hsclkctl; - I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val); - I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); - I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); - I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); - I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); - I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc); - val = I915_READ(MG_PLL_BIAS(port)); + val = I915_READ(MG_PLL_BIAS(tc_port)); val &= ~hw_state->mg_pll_bias_mask; val |= hw_state->mg_pll_bias; - I915_WRITE(MG_PLL_BIAS(port), val); + I915_WRITE(MG_PLL_BIAS(tc_port), val); - val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); val &= ~hw_state->mg_pll_tdc_coldst_bias_mask; val |= hw_state->mg_pll_tdc_coldst_bias; - I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val); - POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } static void icl_pll_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index e96e79413b54..40e8391a92f2 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, u32 pll_id); int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port); bool intel_dpll_is_combophy(enum intel_dpll_id id); #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 85b913ea6e80..90ba5436370e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -630,9 +630,11 @@ struct intel_crtc_scaler_state { }; /* drm_mode->private_flags */ -#define I915_MODE_FLAG_INHERITED 1 +#define I915_MODE_FLAG_INHERITED (1<<0) /* Flag to get scanline using frame time stamps */ #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) +/* Flag to use the scanline counter instead of the pixel counter */ +#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2) struct intel_pipe_wm { struct intel_wm_level wm[5]; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2f3c71f6d313..71c01eb13af1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -458,12 +458,6 @@ cleanup: void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - intel_engine_wakeup(engine); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } @@ -480,53 +474,67 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -void intel_engine_setup_common(struct intel_engine_cs *engine) -{ - i915_timeline_init(engine->i915, &engine->timeline, engine->name); - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - - intel_engine_init_execlist(engine); - intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); - intel_engine_init_cmd_parser(engine); -} - static void cleanup_status_page(struct intel_engine_cs *engine) { + struct i915_vma *vma; + /* Prevent writes into HWSP after returning the page to the system */ intel_engine_set_hwsp_writemask(engine, ~0u); - if (HWS_NEEDS_PHYSICAL(engine->i915)) { - void *addr = fetch_and_zero(&engine->status_page.page_addr); + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) + return; - __free_page(virt_to_page(addr)); - } + if (!HWS_NEEDS_PHYSICAL(engine->i915)) + i915_vma_unpin(vma); + + i915_gem_object_unpin_map(vma->obj); + __i915_gem_object_release_unless_active(vma->obj); +} + +static int pin_ggtt_status_page(struct intel_engine_cs *engine, + struct i915_vma *vma) +{ + unsigned int flags; + + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* + * On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actually map it). + */ + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; - i915_vma_unpin_and_release(&engine->status_page.vma, - I915_VMA_RELEASE_MAP); + return i915_vma_pin(vma, 0, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - unsigned int flags; void *vaddr; int ret; + /* + * Though the HWS register does support 36bit addresses, historically + * we have had hangs and corruption reported due to wild writes if + * the HWS is placed above 4G. We only allow objects to be allocated + * in GFP_DMA32 for i965, and no earlier physical address users had + * access to more than 4G. + */ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); @@ -543,59 +551,67 @@ static int init_status_page(struct intel_engine_cs *engine) goto err; } - flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actually map it). - */ - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); - if (ret) - goto err; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto err_unpin; + goto err; } + engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); engine->status_page.vma = vma; - engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); + + if (!HWS_NEEDS_PHYSICAL(engine->i915)) { + ret = pin_ggtt_status_page(engine, vma); + if (ret) + goto err_unpin; + } + return 0; err_unpin: - i915_vma_unpin(vma); + i915_gem_object_unpin_map(obj); err: i915_gem_object_put(obj); return ret; } -static int init_phys_status_page(struct intel_engine_cs *engine) +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engine_setup_common(struct intel_engine_cs *engine) { - struct page *page; + int err; - /* - * Though the HWS register does support 36bit addresses, historically - * we have had hangs and corruption reported due to wild writes if - * the HWS is placed above 4G. - */ - page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); - if (!page) - return -ENOMEM; + err = init_status_page(engine); + if (err) + return err; + + err = i915_timeline_init(engine->i915, + &engine->timeline, + engine->name, + engine->status_page.vma); + if (err) + goto err_hwsp; + + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - engine->status_page.page_addr = page_address(page); + intel_engine_init_breadcrumbs(engine); + intel_engine_init_execlist(engine); + intel_engine_init_hangcheck(engine); + intel_engine_init_batch_pool(engine); + intel_engine_init_cmd_parser(engine); return 0; + +err_hwsp: + cleanup_status_page(engine); + return err; } static void __intel_context_unpin(struct i915_gem_context *ctx, @@ -604,6 +620,56 @@ static void __intel_context_unpin(struct i915_gem_context *ctx, intel_context_unpin(to_intel_context(ctx, engine)); } +struct measure_breadcrumb { + struct i915_request rq; + struct i915_timeline timeline; + struct intel_ring ring; + u32 cs[1024]; +}; + +static int measure_breadcrumb_dw(struct intel_engine_cs *engine) +{ + struct measure_breadcrumb *frame; + int dw = -ENOMEM; + + GEM_BUG_ON(!engine->i915->gt.scratch); + + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + return -ENOMEM; + + if (i915_timeline_init(engine->i915, + &frame->timeline, "measure", + engine->status_page.vma)) + goto out_frame; + + INIT_LIST_HEAD(&frame->ring.request_list); + frame->ring.timeline = &frame->timeline; + frame->ring.vaddr = frame->cs; + frame->ring.size = sizeof(frame->cs); + frame->ring.effective_size = frame->ring.size; + intel_ring_update_space(&frame->ring); + + frame->rq.i915 = engine->i915; + frame->rq.engine = engine; + frame->rq.ring = &frame->ring; + frame->rq.timeline = &frame->timeline; + + dw = i915_timeline_pin(&frame->timeline); + if (dw < 0) + goto out_timeline; + + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + + i915_timeline_unpin(&frame->timeline); + +out_timeline: + i915_timeline_fini(&frame->timeline); +out_frame: + kfree(frame); + return dw; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -646,21 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine) } } - ret = intel_engine_init_breadcrumbs(engine); - if (ret) + ret = measure_breadcrumb_dw(engine); + if (ret < 0) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(i915)) - ret = init_phys_status_page(engine); - else - ret = init_status_page(engine); - if (ret) - goto err_breadcrumbs; + engine->emit_fini_breadcrumb_dw = ret; return 0; -err_breadcrumbs: - intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (i915->preempt_context) __intel_context_unpin(i915->preempt_context, engine); @@ -1071,10 +1130,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force) if (!reset_engines(i915) && !force) return; - for_each_engine(engine, i915, id) { - if (engine->reset.reset) - engine->reset.reset(engine, NULL); - } + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); } /** @@ -1110,7 +1167,7 @@ void intel_engines_park(struct drm_i915_private *i915) } /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); @@ -1226,10 +1283,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, - i915_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "+" : "", rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1320,7 +1381,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + const u32 *hws = + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int idx; u8 read, write; @@ -1363,9 +1425,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", idx, count, - i915_ggtt_offset(rq->ring->vma)); + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1420,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { - struct intel_breadcrumbs * const b = &engine->breadcrumbs; struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; - unsigned long flags; - struct rb_node *rb; if (header) { va_list ap; @@ -1475,6 +1535,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + rq->timeline->hwsp_offset); print_request_ring(m, rq); } @@ -1491,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_execlists_show_requests(engine, m, print_request, 8); - spin_lock_irqsave(&b->rb_lock, flags); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - drm_printf(m, "\t%s [%d:%c] waiting for %x\n", - w->tsk->comm, w->tsk->pid, - task_state_to_char(w->tsk), - w->seqno); - } - spin_unlock_irqrestore(&b->rb_lock, flags); - drm_printf(m, "HWSP:\n"); - hexdump(m, engine->status_page.page_addr, PAGE_SIZE); + hexdump(m, engine->status_page.addr, PAGE_SIZE); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); + + intel_engine_print_breadcrumbs(engine, m); } static u8 user_class_map[] = { diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index 105e2a9e874a..b96a31bc1080 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -112,7 +112,6 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -#define MI_STORE_DWORD_INDEX_SHIFT 2 /* * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ab1c49b106f2..8bc8aa54aa35 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -81,6 +81,12 @@ * */ +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -623,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work) EXECLISTS_ACTIVE_PREEMPT); tasklet_schedule(&engine->execlists.tasklet); } + + (void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++); } /* @@ -666,7 +674,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine) execlists_unwind_incomplete_requests(execlists); wait_for_guc_preempt_report(engine); - intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0); } /** @@ -731,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - int prio = execlists->queue_priority; + int prio = execlists->queue_priority_hint; if (__execlists_need_preempt(prio, port_prio(port))) { execlists_set_active(execlists, @@ -777,7 +785,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + execlists->queue_priority_hint = + rb ? to_priolist(rb)->priority : INT_MIN; if (submit) port_assign(port, last); if (last) @@ -824,7 +833,7 @@ static void guc_submission_tasklet(unsigned long data) } if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && - intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == GUC_PREEMPT_FINISHED) complete_preempt_context(engine); @@ -834,8 +843,7 @@ static void guc_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request * -guc_reset_prepare(struct intel_engine_cs *engine) +static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -861,8 +869,6 @@ guc_reset_prepare(struct intel_engine_cs *engine) */ if (engine->i915->guc.preempt_wq) flush_workqueue(engine->i915->guc.preempt_wq); - - return i915_gem_find_active_request(engine); } /* diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 741441daae32..a219c796e56d 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -25,6 +25,17 @@ #include "i915_drv.h" #include "i915_reset.h" +struct hangcheck { + u64 acthd; + u32 seqno; + enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; + int deadlock; + struct intel_instdone instdone; + bool wedged:1; + bool stalled:1; +}; + static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { u32 tmp = current_instdone | *old_instdone; @@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) } static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); hc->seqno = intel_engine_get_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; engine->hangcheck.seqno = hc->seqno; - engine->hangcheck.action = hc->action; - engine->hangcheck.stalled = hc->stalled; - engine->hangcheck.wedged = hc->wedged; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { if (engine->hangcheck.seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; @@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine, } static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; @@ -265,19 +273,21 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck hc; + struct hangcheck hc; + + intel_engine_signal_breadcrumbs(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); - if (engine->hangcheck.stalled) { + if (hc.stalled) { hung |= intel_engine_flag(engine); if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } - if (engine->hangcheck.wedged) + if (hc.wedged) wedged |= intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8aa8a4862543..a9eb0211ce77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" @@ -171,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -181,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } +static int queue_prio(const struct intel_engine_execlists *execlists) +{ + struct i915_priolist *p; + struct rb_node *rb; + + rb = rb_first_cached(&execlists->queue); + if (!rb) + return INT_MIN; + + /* + * As the priolist[] are inverted, with the highest priority in [0], + * we have to flip the index value to become priority. + */ + p = to_priolist(rb); + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} + static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *last, - int prio) + const struct i915_request *rq) { - return (intel_engine_has_preemption(engine) && - __execlists_need_preempt(prio, rq_prio(last)) && - !i915_request_completed(last)); + const int last_prio = rq_prio(rq); + + if (!intel_engine_has_preemption(engine)) + return false; + + if (i915_request_completed(rq)) + return false; + + /* + * Check if the current priority hint merits a preemption attempt. + * + * We record the highest value priority we saw during rescheduling + * prior to this dequeue, therefore we know that if it is strictly + * less than the current tail of ESLP[0], we do not need to force + * a preempt-to-idle cycle. + * + * However, the priority hint is a mere hint that we may need to + * preempt. If that hint is stale or we may be trying to preempt + * ourselves, ignore the request. + */ + if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) + return false; + + /* + * Check against the first request in ELSP[1], it will, thanks to the + * power of PI, be the highest priority of that context. + */ + if (!list_is_last(&rq->link, &engine->timeline.requests) && + rq_prio(list_next_entry(rq, link)) > last_prio) + return true; + + /* + * If the inflight context did not trigger the preemption, then maybe + * it was the set of queued requests? Pick the highest priority in + * the queue (the first active priolist) and see if it deserves to be + * running instead of ELSP[0]. + * + * The highest priority request in the queue can not be either + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same + * context, it's priority would not exceed ELSP[0] aka last_prio. + */ + return queue_prio(&engine->execlists) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct intel_engine_execlists *execlists, + const struct i915_request *prev, + const struct i915_request *next) +{ + if (!prev) + return true; + + /* + * Without preemption, the prev may refer to the still active element + * which we refuse to let go. + * + * Even with preemption, there are times when we think it is better not + * to preempt and leave an ostensibly lower priority request in flight. + */ + if (port_request(execlists->port) == prev) + return true; + + return rq_prio(prev) >= rq_prio(next); } /* @@ -264,7 +348,8 @@ static void unwind_wa_tail(struct i915_request *rq) assert_ring_tail_valid(rq->ring, rq->tail); } -static void __unwind_incomplete_requests(struct intel_engine_cs *engine) +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; struct list_head *uninitialized_var(pl); @@ -306,6 +391,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); } + + return active; } void @@ -436,11 +523,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine), rq_prio(rq)); } else { @@ -512,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } static void complete_preempt_context(struct intel_engine_execlists *execlists) @@ -580,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last, execlists->queue_priority)) { + if (need_preempt(engine, last)) { inject_preempt_context(engine); return; } @@ -613,7 +703,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for + * request. See gen8_emit_fini_breadcrumb() for * where we prepare the padding after the * end of the request. */ @@ -626,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - GEM_BUG_ON(last && - need_preempt(engine, last, rq_prio(rq))); + GEM_BUG_ON(!assert_priority_queue(execlists, last, rq)); /* * Can we combine this request with the current port? @@ -688,20 +777,20 @@ done: /* * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. * - * We choose queue_priority such that if we add a request of greater + * We choose the priority hint such that if we add a request of greater * priority than this, we kick the submission tasklet to decide on * the right order of submitting the requests to hardware. We must * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the queue_priority then as the first "hole" in + * HW. We derive the priority hint then as the first "hole" in * the HW submission ports and if there are no available slots, * the priority of the lowest executing request, i.e. last. * * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the queue_priority is bumped to that + * user, see queue_request(), the priority hint is bumped to that * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority = + execlists->queue_priority_hint = port != execlists->port ? rq_prio(last) : INT_MIN; if (submit) { @@ -732,11 +821,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); @@ -820,10 +910,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (i915_request_signaled(rq)) - continue; + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } /* Flush the queued requests to the timeline list (for retiring). */ @@ -833,9 +923,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) priolist_for_each_request_consume(rq, rn, p, i) { list_del_init(&rq->sched.link); - - dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -849,7 +939,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); @@ -872,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine) const u32 * const buf = execlists->csb_status; u8 head, tail; + lockdep_assert_held(&engine->timeline.lock); + /* * Note that csb_write, csb_status may be either in HWSP or mmio. * When reading from the csb_write mmio register, we have to be @@ -960,12 +1052,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, + rq ? hwsp_seqno(rq) : 0, intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); @@ -1079,8 +1172,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; + if (prio > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = prio; __submit_queue_imm(engine); } } @@ -1173,6 +1266,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } +static u32 make_rpcs(struct drm_i915_private *dev_priv); + +static void +__execlists_update_reg_state(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + u32 *regs = ce->lrc_reg_state; + struct intel_ring *ring = ce->ring; + + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD + 1] = ring->head; + regs[CTX_RING_TAIL + 1] = ring->tail; + + /* RPCS */ + if (engine->class == RENDER_CLASS) + regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915); +} + static struct intel_context * __execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx, @@ -1211,10 +1322,8 @@ __execlists_context_pin(struct intel_engine_cs *engine, GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail; + + __execlists_update_reg_state(engine, ce); ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1254,6 +1363,34 @@ execlists_context_pin(struct intel_engine_cs *engine, return __execlists_context_pin(engine, ctx, ce); } +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = rq->timeline->hwsp_offset; + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + return 0; +} + static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; @@ -1679,7 +1816,7 @@ static void enable_execlists(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(STOP_RING)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), - engine->status_page.ggtt_offset); + i915_ggtt_offset(engine->status_page.vma)); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -1716,11 +1853,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) return 0; } -static struct i915_request * -execlists_reset_prepare(struct intel_engine_cs *engine) +static void execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *request, *active; unsigned long flags; GEM_TRACE("%s: depth<-%d\n", engine->name, @@ -1736,59 +1871,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine) * prevents the race. */ __tasklet_disable_sync_once(&execlists->tasklet); + GEM_BUG_ON(!reset_in_progress(execlists)); + /* And flush any current direct submission. */ spin_lock_irqsave(&engine->timeline.lock, flags); - - /* - * We want to flush the pending context switches, having disabled - * the tasklet above, we can assume exclusive access to the execlists. - * For this allows us to catch up with an inflight preemption event, - * and avoid blaming an innocent request if the stall was due to the - * preemption itself. - */ - process_csb(engine); - - /* - * The last active request can then be no later than the last request - * now in ELSP[0]. So search backwards from there, so that if the GPU - * has advanced beyond the last CSB update, it will be pardoned. - */ - active = NULL; - request = port_request(execlists->port); - if (request) { - /* - * Prevent the breadcrumb from advancing before we decide - * which request is currently active. - */ - intel_engine_stop_cs(engine); - - list_for_each_entry_from_reverse(request, - &engine->timeline.requests, - link) { - if (__i915_request_completed(request, - request->global_seqno)) - break; - - active = request; - } - } - + process_csb(engine); /* drain preemption events */ spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; } -static void execlists_reset(struct intel_engine_cs *engine, - struct i915_request *request) +static void execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; unsigned long flags; u32 *regs; - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, request ? request->global_seqno : 0, - intel_engine_get_seqno(engine)); - spin_lock_irqsave(&engine->timeline.lock, flags); /* @@ -1803,12 +1900,18 @@ static void execlists_reset(struct intel_engine_cs *engine, execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - __unwind_incomplete_requests(engine); + rq = __unwind_incomplete_requests(engine); /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); + if (!rq) + goto out_unlock; /* * If the request was innocent, we leave the request in the ELSP @@ -1821,8 +1924,9 @@ static void execlists_reset(struct intel_engine_cs *engine, * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - if (!request || request->fence.error != -EIO) - return; + i915_reset_request(rq, stalled); + if (!stalled) + goto out_unlock; /* * We want a simple context + ring to execute the breadcrumb update. @@ -1832,25 +1936,22 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = request->hw_context->lrc_reg_state; + regs = rq->hw_context->lrc_reg_state; if (engine->pinned_default_state) { memcpy(regs, /* skip restoring the vanilla PPHWSP */ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, - request->gem_context, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); - - request->ring->head = intel_ring_wrap(request->ring, request->postfix); - regs[CTX_RING_HEAD + 1] = request->ring->head; + rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix); + intel_ring_update_space(rq->ring); - intel_ring_update_space(request->ring); + execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring); + __execlists_update_reg_state(engine, rq->hw_context); - /* Reset WaIdleLiteRestore:bdw,skl as well */ - unwind_wa_tail(request); +out_unlock: + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -1863,6 +1964,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * to sleep before we restart and reload a context. * */ + GEM_BUG_ON(!reset_in_progress(execlists)); if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) execlists->tasklet.func(execlists->tasklet.data); @@ -2035,53 +2137,62 @@ static int gen8_emit_flush_render(struct i915_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + + return cs; } -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->global_seqno, + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset); + + cs = gen8_emit_ggtt_write(cs, + request->global_seqno, intel_hws_seqno_address(request->engine)); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* We're using qword write, seqno should be aligned to 8 bytes. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), + request->fence.seqno, + request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + request->global_seqno, + intel_hws_seqno_address(request->engine), + PIPE_CONTROL_CS_STALL); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct i915_request *rq) { @@ -2173,8 +2284,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; - engine->emit_breadcrumb = gen8_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2213,10 +2324,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -static void +static int logical_ring_setup(struct intel_engine_cs *engine) { - intel_engine_setup_common(engine); + int err; + + err = intel_engine_setup_common(engine); + if (err) + return err; /* Intentionally left blank. */ engine->buffer = NULL; @@ -2226,6 +2341,8 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + + return 0; } static int logical_ring_init(struct intel_engine_cs *engine) @@ -2260,10 +2377,10 @@ static int logical_ring_init(struct intel_engine_cs *engine) } execlists->csb_status = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = - &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; reset_csb_pointers(execlists); @@ -2274,13 +2391,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine) { int ret; - logical_ring_setup(engine); + ret = logical_ring_setup(engine); + if (ret) + return ret; /* Override some for render ring. */ engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; ret = logical_ring_init(engine); if (ret) @@ -2304,7 +2422,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine) int logical_xcs_ring_init(struct intel_engine_cs *engine) { - logical_ring_setup(engine); + int err; + + err = logical_ring_setup(engine); + if (err) + return err; return logical_ring_init(engine); } @@ -2534,8 +2656,7 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); i915_oa_init_reg_state(engine, ctx, regs); } @@ -2638,7 +2759,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - timeline = i915_timeline_create(ctx->i915, ctx->name); + timeline = i915_timeline_create(ctx->i915, ctx->name, NULL); if (IS_ERR(timeline)) { ret = PTR_ERR(timeline); goto error_deref_obj; @@ -2696,12 +2817,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915) intel_ring_reset(ce->ring, 0); - if (ce->pin_count) { /* otherwise done in context_pin */ - u32 *regs = ce->lrc_reg_state; - - regs[CTX_RING_HEAD + 1] = ce->ring->head; - regs[CTX_RING_TAIL + 1] = ce->ring->tail; - } + if (ce->pin_count) /* otherwise done in context_pin */ + __execlists_update_reg_state(engine, ce); } } } @@ -2740,7 +2857,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, last = NULL; count = 0; - drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); + if (execlists->queue_priority_hint != INT_MIN) + drm_printf(m, "\t\tQueue priority hint: %d\n", + execlists->queue_priority_hint); for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); int i; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index e976c5ce5479..331e7a678fb7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -28,48 +28,60 @@ struct drm_i915_mocs_entry { u32 control_value; u16 l3cc_value; + u16 used; }; struct drm_i915_mocs_table { - u32 size; + unsigned int size; + unsigned int n_entries; const struct drm_i915_mocs_entry *table; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define LE_CACHEABILITY(value) ((value) << 0) -#define LE_TGT_CACHE(value) ((value) << 2) +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) #define LE_LRUM(value) ((value) << 4) #define LE_AOM(value) ((value) << 6) #define LE_RSC(value) ((value) << 7) #define LE_SCC(value) ((value) << 8) #define LE_PFM(value) ((value) << 11) #define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) -#define L3_CACHEABILITY(value) ((value) << 4) +#define _L3_CACHEABILITY(value) ((value) << 4) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ +#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ -#define LE_PAGETABLE 0 -#define LE_UC 1 -#define LE_WT 2 -#define LE_WB 3 - -/* L3 caching options */ -#define L3_DIRECT 0 -#define L3_UC 1 -#define L3_RESERVED 2 -#define L3_WB 3 +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) /* Target cache */ -#define LE_TC_PAGETABLE 0 -#define LE_TC_LLC 1 -#define LE_TC_LLC_ELLC 2 -#define LE_TC_LLC_ELLC_ALT 3 +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } /* * MOCS tables @@ -80,85 +92,147 @@ struct drm_i915_mocs_table { * LNCFCMOCS0 - LNCFCMOCS32 registers. * * These tables are intended to be kept reasonably consistent across - * platforms. However some of the fields are not applicable to all of - * them. + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For the time - * being they will be implicitly initialized to the strictest caching - * configuration (uncached) to guarantee forwards compatibility with - * userspace programs written against more recent kernels providing - * additional MOCS entries. + * being they will be initialized to PTE. * - * NOTE: These tables MUST start with being uncached and the length - * MUST be less than 63 as the last two registers are reserved - * by the hardware. These tables are part of the kernel ABI and - * may only be updated incrementally by adding entries at the - * end. + * The last two entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. */ +#define GEN9_MOCS_ENTRIES \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_2_LLC_ELLC, \ + L3_1_UC), \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + L3_3_WB) + static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, - [I915_MOCS_CACHED] = { - /* 0x0000003b */ - .control_value = LE_CACHEABILITY(LE_WB) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, - [I915_MOCS_CACHED] = { - /* 0x00000039 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) +}; + +#define GEN11_MOCS_ENTRIES \ + /* Base - Uncached (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 + LeCC:PAT (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { + GEN11_MOCS_ENTRIES }; /** @@ -178,13 +252,19 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || - IS_ICELAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + table->size = ARRAY_SIZE(icelake_mocs_table); + table->table = icelake_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + result = true; + } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; result = true; } else if (IS_GEN9_LP(dev_priv)) { table->size = ARRAY_SIZE(broxton_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; result = true; } else { @@ -226,6 +306,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) } } +/* + * Get control_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].control_value; + + return table->table[I915_MOCS_PTE].control_value; +} + /** * intel_mocs_init_engine() - emit the mocs control table * @engine: The engine for whom to emit the registers. @@ -238,27 +331,23 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_mocs_table table; unsigned int index; + u32 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; - GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); - - for (index = 0; index < table.size; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. - */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[0].control_value); + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].control_value; + + for (index = 0; index < table.size; index++) { + u32 value = get_entry_control(&table, index); + + I915_WRITE(mocs_register(engine->id, index), value); + } + + /* All remaining entries are also unused */ + for (; index < table.n_entries; index++) + I915_WRITE(mocs_register(engine->id, index), unused_value); } /** @@ -276,33 +365,32 @@ static int emit_mocs_control_table(struct i915_request *rq, { enum intel_engine_id engine = rq->engine->id; unsigned int index; + u32 unused_value; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; - cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].control_value; + + cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); for (index = 0; index < table->size; index++) { + u32 value = get_entry_control(table, index); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[index].control_value; + *cs++ = value; } - /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. - */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { + /* All remaining entries are also unused */ + for (; index < table->n_entries; index++) { *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[0].control_value; + *cs++ = unused_value; } *cs++ = MI_NOOP; @@ -311,12 +399,24 @@ static int emit_mocs_control_table(struct i915_request *rq, return 0; } +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].l3cc_value; + + return table->table[I915_MOCS_PTE].l3cc_value; +} + static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, u16 low, u16 high) { - return table->table[low].l3cc_value | - table->table[high].l3cc_value << 16; + return low | high << 16; } /** @@ -333,38 +433,43 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { + u16 unused_value; unsigned int i; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; - cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].l3cc_value; + + cs = intel_ring_begin(rq, 2 + table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); + + for (i = 0; i < table->size / 2; i++) { + u16 low = get_entry_l3cc(table, 2 * i); + u16 high = get_entry_l3cc(table, 2 * i + 1); - for (i = 0; i < table->size/2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); + *cs++ = l3cc_combine(table, low, high); } + /* Odd table size - 1 left over */ if (table->size & 0x01) { - /* Odd table size - 1 left over */ + u16 low = get_entry_l3cc(table, 2 * i); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 0); + *cs++ = l3cc_combine(table, low, unused_value); i++; } - /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair uninitialised as - * they are reserved by the hardware. - */ - for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { + /* All remaining entries are also unused */ + for (; i < table->n_entries / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 0, 0); + *cs++ = l3cc_combine(table, unused_value, unused_value); } *cs++ = MI_NOOP; @@ -391,26 +496,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) { struct drm_i915_mocs_table table; unsigned int i; + u16 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; - for (i = 0; i < table.size/2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1)); + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].l3cc_value; + + for (i = 0; i < table.size / 2; i++) { + u16 low = get_entry_l3cc(&table, 2 * i); + u16 high = get_entry_l3cc(&table, 2 * i + 1); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, high)); + } /* Odd table size - 1 left over */ if (table.size & 0x01) { - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0)); + u16 low = get_entry_l3cc(&table, 2 * i); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, unused_value)); i++; } - /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair as initialised as - * they are reserved by the hardware. - */ - for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0)); + /* All remaining entries are also unused */ + for (; i < table.n_entries / 2; i++) + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, unused_value, unused_value)); } /** diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c300e5787b3c..a9238fd07e30 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -480,8 +480,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) if (!overlay) return; - intel_overlay_release_old_vid(overlay); - overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fdc28a3d2936..ed9786241307 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) - return true; - - return false; + return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv); } static bool @@ -3670,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; - DRM_DEBUG_KMS("Enabling the SAGV\n"); + DRM_DEBUG_KMS("Enabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); - /* We don't need to wait for the SAGV when enabling */ + /* We don't need to wait for SAGV when enabling */ mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to enable the SAGV\n"); + DRM_ERROR("Failed to enable SAGV\n"); return ret; } @@ -3707,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; - DRM_DEBUG_KMS("Disabling the SAGV\n"); + DRM_DEBUG_KMS("Disabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ @@ -3719,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); + DRM_ERROR("Failed to disable SAGV (%d)\n", ret); return ret; } @@ -3757,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) sagv_block_time_us = 10; /* - * SKL+ workaround: bspec recommends we disable the SAGV when we have + * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled * * If there are no active CRTCs, no additional checks need be performed @@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (skl_needs_memory_bw_wa(dev_priv) && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3798,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* * If any of the planes on this pipe don't enable wm levels that * incur memory latencies higher than sagv_block_time_us we - * can't enable the SAGV. + * can't enable SAGV. */ if (latency < sagv_block_time_us) return false; @@ -3827,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, /* * 12GB/s is maximum BW supported by single DBuf slice. + * + * FIXME dbuf slice code is broken: + * - must wait for planes to stop using the slice before powering it off + * - plane straddling both slices is illegal in multi-pipe scenarios + * - should validate we stay within the hw bandwidth limits */ - if (num_active > 1 || total_data_bw >= GBps(12)) { + if (0 && (num_active > 1 || total_data_bw >= GBps(12))) { ddb->enabled_slices = 2; } else { ddb->enabled_slices = 1; @@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, continue; wm = &cstate->wm.skl.optimal.planes[plane_id]; - blocks += wm->wm[level].plane_res_b; - blocks += wm->uv_wm[level].plane_res_b; + blocks += wm->wm[level].min_ddb_alloc; + blocks += wm->uv_wm[level].min_ddb_alloc; } if (blocks < alloc_size) { @@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - total[plane_id] = wm->wm[level].plane_res_b + extra; + total[plane_id] = wm->wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; @@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; + uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; } @@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { wm = &cstate->wm.skl.optimal.planes[plane_id]; - if (wm->trans_wm.plane_res_b > total[plane_id]) + if (wm->trans_wm.plane_res_b >= total[plane_id]) memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); } @@ -4579,9 +4579,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; u32 interm_pbpl; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); /* only NV12 format has two planes */ if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) { @@ -4617,7 +4614,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, intel_pstate); if (INTEL_GEN(dev_priv) >= 11 && - fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) + fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1) wp->dbuf_block_size = 256; else wp->dbuf_block_size = 512; @@ -4642,7 +4639,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, wp->y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (skl_needs_memory_bw_wa(dev_priv)) wp->y_min_scanlines *= 2; wp->plane_bytes_per_line = wp->width * wp->cpp; @@ -4674,6 +4671,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, return 0; } +static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) +{ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + return true; + + /* The number of lines are ignored for the level 0 watermark. */ + return level > 0; +} + static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, int level, @@ -4686,10 +4692,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; - u32 res_blocks, res_lines; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + u32 res_blocks, res_lines, min_ddb_alloc = 0; + + if (latency == 0) + return; /* Display WA #1141: kbl,cfl */ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || @@ -4697,7 +4703,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && wp->x_tiled) + if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled) latency += 15; method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, @@ -4756,8 +4762,28 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, } } - /* The number of lines are ignored for the level 0 watermark. */ - if (level > 0 && res_lines > 31) + if (INTEL_GEN(dev_priv) >= 11) { + if (wp->y_tiled) { + int extra_lines; + + if (res_lines % wp->y_min_scanlines == 0) + extra_lines = wp->y_min_scanlines; + else + extra_lines = wp->y_min_scanlines * 2 - + res_lines % wp->y_min_scanlines; + + min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines, + wp->plane_blocks_per_line); + } else { + min_ddb_alloc = res_blocks + + DIV_ROUND_UP(res_blocks, 10); + } + } + + if (!skl_wm_has_lines(dev_priv, level)) + res_lines = 0; + + if (res_lines > 31) return; /* @@ -4768,6 +4794,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, */ result->plane_res_b = res_blocks; result->plane_res_l = res_lines; + /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ + result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1; result->plane_en = true; } @@ -4801,15 +4829,10 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate) u32 linetime_wm; linetime_us = intel_get_linetime_us(cstate); - - if (is_fixed16_zero(linetime_us)) - return 0; - linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); - /* Display WA #1135: bxt:ALL GLK:ALL */ - if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && - dev_priv->ipc_enabled) + /* Display WA #1135: BXT:ALL GLK:ALL */ + if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) linetime_wm /= 2; return linetime_wm; @@ -5118,6 +5141,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); } +static bool skl_pipe_wm_equals(struct intel_crtc *crtc, + const struct skl_pipe_wm *wm1, + const struct skl_pipe_wm *wm2) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + if (!skl_plane_wm_equals(dev_priv, + &wm1->planes[plane_id], + &wm2->planes[plane_id])) + return false; + } + + return wm1->linetime == wm2->linetime; +} + static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, const struct skl_ddb_entry *b) { @@ -5144,16 +5184,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate, struct skl_pipe_wm *pipe_wm, /* out */ bool *changed /* out */) { + struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); int ret; ret = skl_build_pipe_wm(cstate, pipe_wm); if (ret) return ret; - if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) - *changed = false; - else - *changed = true; + *changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e39e483d8d16..b889b27f8aeb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_workarounds.h" @@ -42,6 +43,12 @@ */ #define LEGACY_REQUEST_SIZE 200 +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); +} + static unsigned int __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) @@ -299,7 +306,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ *cs++ = GFX_OP_PIPE_CONTROL(4); @@ -319,6 +326,11 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->global_seqno; @@ -327,8 +339,9 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen6_rcs_emit_breadcrumb_sz = 14; static int gen7_render_ring_cs_stall_wa(struct i915_request *rq) @@ -409,7 +422,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -419,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); *cs++ = intel_hws_seqno_address(rq->engine); *cs++ = rq->global_seqno; @@ -427,34 +447,52 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen7_rcs_emit_breadcrumb_sz = 6; -static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen6_xcs_emit_breadcrumb_sz = 4; #define GEN7_XCS_WA 32 -static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_FLUSH_DW; @@ -462,12 +500,12 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3; #undef GEN7_XCS_WA static void set_hwstam(struct intel_engine_cs *engine, u32 mask) @@ -498,12 +536,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) I915_WRITE(HWS_PGA, addr); } -static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +static struct page *status_page(struct intel_engine_cs *engine) { - struct page *page = virt_to_page(engine->status_page.page_addr); - phys_addr_t phys = PFN_PHYS(page_to_pfn(page)); + struct drm_i915_gem_object *obj = engine->status_page.vma->obj; - set_hws_pga(engine, phys); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +{ + set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); set_hwstam(engine, ~0u); } @@ -570,7 +613,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) static void ring_setup_status_page(struct intel_engine_cs *engine) { - set_hwsp(engine, engine->status_page.ggtt_offset); + set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); set_hwstam(engine, ~0u); flush_cs_tlb(engine); @@ -700,59 +743,87 @@ static int init_ring_common(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_wakeup(engine); + intel_engine_queue_breadcrumbs(engine); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; } -static struct i915_request *reset_prepare(struct intel_engine_cs *engine) +static void reset_prepare(struct intel_engine_cs *engine) { intel_engine_stop_cs(engine); - return i915_gem_find_active_request(engine); } -static void skip_request(struct i915_request *rq) +static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - void *vaddr = rq->ring->vaddr; + struct i915_timeline *tl = &engine->timeline; + struct i915_request *pos, *rq; + unsigned long flags; u32 head; - head = rq->infix; - if (rq->postfix < head) { - memset32(vaddr + head, MI_NOOP, - (rq->ring->size - head) / sizeof(u32)); - head = 0; + rq = NULL; + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry(pos, &tl->requests, link) { + if (!i915_request_completed(pos)) { + rq = pos; + break; + } } - memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); -} - -static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) -{ - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine)); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); /* - * Try to restore the logical GPU state to match the continuation - * of the request queue. If we skip the context/PD restore, then - * the next request may try to execute assuming that its context - * is valid and loaded on the GPU and so may try to access invalid - * memory, prompting repeated GPU hangs. + * The guilty request will get skipped on a hung engine. * - * If the request was guilty, we still restore the logical state - * in case the next request requires it (e.g. the aliasing ppgtt), - * but skip over the hung batch. + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. * - * If the request was innocent, we try to replay the request with - * the restored context. + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. */ + if (rq) { - /* If the rq hung, jump to its breadcrumb and skip the batch */ - rq->ring->head = intel_ring_wrap(rq->ring, rq->head); - if (rq->fence.error == -EIO) - skip_request(rq); + /* + * Try to restore the logical GPU state to match the + * continuation of the request queue. If we skip the + * context/PD restore, then the next request may try to execute + * assuming that its context is valid and loaded on the GPU and + * so may try to access invalid memory, prompting repeated GPU + * hangs. + * + * If the request was guilty, we still restore the logical + * state in case the next request requires it (e.g. the + * aliasing ppgtt), but skip over the hung batch. + * + * If the request was innocent, we try to replay the request + * with the restored context. + */ + i915_reset_request(rq, stalled); + + GEM_BUG_ON(rq->ring != engine->buffer); + head = rq->head; + } else { + head = engine->buffer->tail; } + engine->buffer->head = intel_ring_wrap(engine->buffer, head); + + spin_unlock_irqrestore(&tl->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -836,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); - if (i915_request_signaled(request)) - continue; + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); - dma_fence_set_error(&request->fence, -EIO); + i915_request_mark_complete(request); } intel_write_status_page(engine, @@ -861,29 +932,43 @@ static void i9xx_submit_request(struct i915_request *request) intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int i9xx_emit_breadcrumb_sz = 6; #define GEN5_WA_STORES 8 /* must be at least 1! */ -static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -892,11 +977,13 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2; #undef GEN5_WA_STORES static void @@ -1123,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring) GEM_BUG_ON(ring->vaddr); + ret = i915_timeline_pin(ring->timeline); + if (ret) + return ret; + flags = PIN_GLOBAL; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1139,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring) else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); if (unlikely(ret)) - return ret; + goto unpin_timeline; } ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - return ret; + goto unpin_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else addr = i915_gem_object_pin_map(vma->obj, map); - if (IS_ERR(addr)) - goto err; + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto unpin_ring; + } vma->obj->pin_global++; ring->vaddr = addr; return 0; -err: +unpin_ring: i915_vma_unpin(vma); - return PTR_ERR(addr); +unpin_timeline: + i915_timeline_unpin(ring->timeline); + return ret; } void intel_ring_reset(struct intel_ring *ring, u32 tail) @@ -1189,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); + + i915_timeline_unpin(ring->timeline); } static struct i915_vma * @@ -1499,13 +1596,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) struct intel_ring *ring; int err; - intel_engine_setup_common(engine); + err = intel_engine_setup_common(engine); + if (err) + return err; - timeline = i915_timeline_create(engine->i915, engine->name); + timeline = i915_timeline_create(engine->i915, + engine->name, + engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } + GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); i915_timeline_put(timeline); @@ -1525,6 +1627,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err_unpin; + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + return 0; err_unpin: @@ -1857,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!request->hw_context->pin_count); + GEM_BUG_ON(request->timeline->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -2193,12 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->request_alloc = ring_request_alloc; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; - engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (IS_GEN(dev_priv, 5)) { - engine->emit_breadcrumb = gen5_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz; - } + /* + * Using a global execution timeline; the previous final breadcrumb is + * equivalent to our next initial bread so we can elide + * engine->emit_init_breadcrumb(). + */ + engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; + if (IS_GEN(dev_priv, 5)) + engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; @@ -2227,13 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; - engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; - engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2269,13 +2374,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2298,13 +2400,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } @@ -2322,8 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c3ef0f9bf321..34d0a148e664 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include <drm/drm_util.h> #include <linux/hashtable.h> +#include <linux/irq_work.h> #include <linux/seqlock.h> #include "i915_gem_batch_pool.h" @@ -32,8 +33,7 @@ struct i915_sched_attr; struct intel_hw_status_page { struct i915_vma *vma; - u32 *page_addr; - u32 ggtt_offset; + u32 *addr; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -120,13 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; u32 seqno; - enum intel_engine_hangcheck_action action; unsigned long action_timestamp; - int deadlock; struct intel_instdone instdone; - struct i915_request *active_request; - bool stalled:1; - bool wedged:1; }; struct intel_ring { @@ -209,6 +204,7 @@ struct i915_priolist { struct st_preempt_hang { struct completion completion; + unsigned int count; bool inject_hang; }; @@ -299,14 +295,18 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority: Highest pending priority. + * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of * executing requests, we compute the maximum priority of those * pending requests. We can then use this value to determine if * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. */ - int queue_priority; + int queue_priority_hint; /** * @queue: queue of requests, in priority lists @@ -382,22 +382,14 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects irq_*; irqsafe */ - struct intel_wait *irq_wait; /* oldest waiter by retirement */ + spinlock_t irq_lock; + struct list_head signalers; - spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ - struct rb_root waiters; /* sorted by retirement, priority */ - struct list_head signals; /* sorted by retirement */ - struct task_struct *signaler; /* used for fence signalling */ + struct irq_work irq_work; /* for use from inside irq_lock */ - struct timer_list fake_irq; /* used after a missed interrupt */ - struct timer_list hangcheck; /* detect missed interrupts */ - - unsigned int hangcheck_interrupts; unsigned int irq_enabled; - unsigned int irq_count; - bool irq_armed : 1; + bool irq_armed; } breadcrumbs; struct { @@ -444,9 +436,8 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); struct { - struct i915_request *(*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, - struct i915_request *rq); + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); void (*finish)(struct intel_engine_cs *engine); } reset; @@ -470,8 +461,10 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_sz; + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). @@ -677,7 +670,7 @@ static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ - return READ_ONCE(engine->status_page.page_addr[reg]); + return READ_ONCE(engine->status_page.addr[reg]); } static inline void @@ -690,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ if (static_cpu_has(X86_FEATURE_CLFLUSH)) { mb(); - clflush(&engine->status_page.page_addr[reg]); - engine->status_page.page_addr[reg] = value; - clflush(&engine->status_page.page_addr[reg]); + clflush(&engine->status_page.addr[reg]); + engine->status_page.addr[reg] = value; + clflush(&engine->status_page.addr[reg]); mb(); } else { - WRITE_ONCE(engine->status_page.page_addr[reg], value); + WRITE_ONCE(engine->status_page.addr[reg], value); } } @@ -716,11 +709,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * The area from dword 0x30 to 0x3ff is available for driver usage. */ #define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX 0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX 0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SEQNO 0x40 +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x80 +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f @@ -825,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -883,93 +878,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ - wait->tsk = current; - wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ - return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->seqno = seqno; - return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ - return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ - return RB_EMPTY_NODE(&wait->node); -} +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.irq_wait); + irq_work_queue(&engine->breadcrumbs.irq_work); } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); @@ -1018,6 +949,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) return cs; } +static inline void intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) +{ + if (engine->reset.reset) + engine->reset.reset(engine, stalled); +} + void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b02d3d9809e3..cd42e81f8a90 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane, keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); - keymsk = key->channel_mask & 0x3ffffff; + keymsk = key->channel_mask & 0x7ffffff; if (alpha < 0xff) keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index bd5536f0ec92..3924c4944e1f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -306,7 +306,7 @@ struct tv_mode { u32 clock; u16 refresh; /* in millihertz (for precision) */ - u32 oversample; + u8 oversample; u8 hsync_end; u16 hblank_start, hblank_end, htotal; bool progressive : 1, trilevel_sync : 1, component_only : 1; @@ -339,7 +339,6 @@ struct tv_mode { const struct video_levels *composite_levels, *svideo_levels; const struct color_conversion *composite_color, *svideo_color; const u32 *filter_table; - u16 max_srcw; }; @@ -378,8 +377,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -421,8 +420,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-443", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */ .hsync_end = 64, .hblank_end = 124, .hblank_start = 836, .htotal = 857, @@ -463,8 +462,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-J", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -506,8 +505,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -550,8 +549,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-N", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, .hsync_end = 64, .hblank_end = 128, .hblank_start = 844, .htotal = 863, @@ -595,8 +594,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, .hsync_end = 64, .hblank_end = 142, .hblank_start = 844, .htotal = 863, @@ -635,10 +634,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "480p", - .clock = 107520, + .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_4X, - .component_only = 1, + .oversample = 4, + .component_only = true, .hsync_end = 64, .hblank_end = 122, .hblank_start = 842, .htotal = 857, @@ -659,10 +658,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "576p", - .clock = 107520, + .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_4X, - .component_only = 1, + .oversample = 4, + .component_only = true, .hsync_end = 64, .hblank_end = 139, .hblank_start = 859, .htotal = 863, @@ -683,10 +682,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1649, @@ -707,10 +706,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1979, @@ -728,14 +727,13 @@ static const struct tv_mode tv_modes[] = { .burst_ena = false, .filter_table = filter_table, - .max_srcw = 800 }, { .name = "1080i@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2639, @@ -758,10 +756,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "1080i@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2199, @@ -782,8 +780,115 @@ static const struct tv_mode tv_modes[] = { .filter_table = filter_table, }, + + { + .name = "1080p@30Hz", + .clock = 148500, + .refresh = 30000, + .oversample = 2, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@50Hz", + .clock = 148500, + .refresh = 50000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2639, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@60Hz", + .clock = 148500, + .refresh = 60000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, +}; + +struct intel_tv_connector_state { + struct drm_connector_state base; + + /* + * May need to override the user margins for + * gen3 >1024 wide source vertical centering. + */ + struct { + u16 top, bottom; + } margins; + + bool bypass_vfilter; }; +#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base) + +static struct drm_connector_state * +intel_tv_connector_duplicate_state(struct drm_connector *connector) +{ + struct intel_tv_connector_state *state; + + state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_connector_duplicate_state(connector, &state->base); + return &state->base; +} + static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) { return container_of(encoder, struct intel_tv, base); @@ -859,14 +964,215 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_RANGE; } +static int +intel_tv_mode_vdisplay(const struct tv_mode *tv_mode) +{ + if (tv_mode->progressive) + return tv_mode->nbr_end + 1; + else + return 2 * (tv_mode->nbr_end + 1); +} + +static void +intel_tv_mode_to_mode(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + mode->clock = tv_mode->clock / + (tv_mode->oversample >> !tv_mode->progressive); + + /* + * tv_mode horizontal timings: + * + * hsync_end + * | hblank_end + * | | hblank_start + * | | | htotal + * | _______ | + * ____/ \___ + * \__/ \ + */ + mode->hdisplay = + tv_mode->hblank_start - tv_mode->hblank_end; + mode->hsync_start = mode->hdisplay + + tv_mode->htotal - tv_mode->hblank_start; + mode->hsync_end = mode->hsync_start + + tv_mode->hsync_end; + mode->htotal = tv_mode->htotal + 1; + + /* + * tv_mode vertical timings: + * + * vsync_start + * | vsync_end + * | | vi_end nbr_end + * | | | | + * | | _______ + * \__ ____/ \ + * \__/ + */ + mode->vdisplay = intel_tv_mode_vdisplay(tv_mode); + if (tv_mode->progressive) { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1; + mode->vsync_end = mode->vsync_start + + tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1; + } else { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1 + + tv_mode->vsync_start_f2 + 1; + mode->vsync_end = mode->vsync_start + + 2 * tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1 + + tv_mode->vi_end_f2 + 1; + } + + /* TV has it's own notion of sync and other mode flags, so clear them. */ + mode->flags = 0; + + mode->vrefresh = 0; + mode->vrefresh = drm_mode_vrefresh(mode); + + snprintf(mode->name, sizeof(mode->name), + "%dx%d%c (%s)", + mode->hdisplay, mode->vdisplay, + tv_mode->progressive ? 'p' : 'i', + tv_mode->name); +} + +static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode, + int hdisplay, int left_margin, + int right_margin) +{ + int hsync_start = mode->hsync_start - mode->hdisplay + right_margin; + int hsync_end = mode->hsync_end - mode->hdisplay + right_margin; + int new_htotal = mode->htotal * hdisplay / + (mode->hdisplay - left_margin - right_margin); + + mode->clock = mode->clock * new_htotal / mode->htotal; + + mode->hdisplay = hdisplay; + mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal; + mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal; + mode->htotal = new_htotal; +} + +static void intel_tv_scale_mode_vert(struct drm_display_mode *mode, + int vdisplay, int top_margin, + int bottom_margin) +{ + int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin; + int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin; + int new_vtotal = mode->vtotal * vdisplay / + (mode->vdisplay - top_margin - bottom_margin); + + mode->clock = mode->clock * new_vtotal / mode->vtotal; + + mode->vdisplay = vdisplay; + mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal; + mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal; + mode->vtotal = new_vtotal; +} static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + struct drm_display_mode mode = {}; + u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp; + struct tv_mode tv_mode = {}; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; + int xsize, ysize, xpos, ypos; + pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + tv_ctl = I915_READ(TV_CTL); + hctl1 = I915_READ(TV_H_CTL_1); + hctl3 = I915_READ(TV_H_CTL_3); + vctl1 = I915_READ(TV_V_CTL_1); + vctl2 = I915_READ(TV_V_CTL_2); + + tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; + tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; + + tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT; + tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT; + + tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT; + tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT; + tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT; + + tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT; + tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT; + tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT; + + tv_mode.clock = pipe_config->port_clock; + + tv_mode.progressive = tv_ctl & TV_PROGRESSIVE; + + switch (tv_ctl & TV_OVERSAMPLE_MASK) { + case TV_OVERSAMPLE_8X: + tv_mode.oversample = 8; + break; + case TV_OVERSAMPLE_4X: + tv_mode.oversample = 4; + break; + case TV_OVERSAMPLE_2X: + tv_mode.oversample = 2; + break; + default: + tv_mode.oversample = 1; + break; + } + + tmp = I915_READ(TV_WIN_POS); + xpos = tmp >> 16; + ypos = tmp & 0xffff; + + tmp = I915_READ(TV_WIN_SIZE); + xsize = tmp >> 16; + ysize = tmp & 0xffff; + + intel_tv_mode_to_mode(&mode, &tv_mode); + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(&mode); + + intel_tv_scale_mode_horiz(&mode, hdisplay, + xpos, mode.hdisplay - xsize - xpos); + intel_tv_scale_mode_vert(&mode, vdisplay, + ypos, mode.vdisplay - ysize - ypos); + + adjusted_mode->crtc_clock = mode.clock; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + adjusted_mode->crtc_clock /= 2; + + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; +} + +static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, + int hdisplay) +{ + return IS_GEN(dev_priv, 3) && hdisplay > 1024; +} + +static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, + const struct drm_connector_state *conn_state, + int vdisplay) +{ + return tv_mode->crtc_vdisplay - + conn_state->tv.margins.top - + conn_state->tv.margins.bottom != + vdisplay; } static int @@ -874,9 +1180,14 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; if (!tv_mode) return -EINVAL; @@ -885,17 +1196,136 @@ intel_tv_compute_config(struct intel_encoder *encoder, return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - adjusted_mode->crtc_clock = tv_mode->clock; + DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; - /* TV has it's own notion of sync and other mode flags, so clear them. */ - adjusted_mode->flags = 0; + pipe_config->port_clock = tv_mode->clock; + + intel_tv_mode_to_mode(adjusted_mode, tv_mode); + drm_mode_set_crtcinfo(adjusted_mode, 0); + + if (intel_tv_source_too_wide(dev_priv, hdisplay) || + !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { + int extra, top, bottom; + + extra = adjusted_mode->crtc_vdisplay - vdisplay; + + if (extra < 0) { + DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n"); + return -EINVAL; + } + + /* Need to turn off the vertical filter and center the image */ + + /* Attempt to maintain the relative sizes of the margins */ + top = conn_state->tv.margins.top; + bottom = conn_state->tv.margins.bottom; + + if (top + bottom) + top = extra * top / (top + bottom); + else + top = extra / 2; + bottom = extra - top; + + tv_conn_state->margins.top = top; + tv_conn_state->margins.bottom = bottom; + + tv_conn_state->bypass_vfilter = true; + + if (!tv_mode->progressive) { + adjusted_mode->clock /= 2; + adjusted_mode->crtc_clock /= 2; + adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE; + } + } else { + tv_conn_state->margins.top = conn_state->tv.margins.top; + tv_conn_state->margins.bottom = conn_state->tv.margins.bottom; + + tv_conn_state->bypass_vfilter = false; + } + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(adjusted_mode); /* - * FIXME: We don't check whether the input mode is actually what we want - * or whether userspace is doing something stupid. + * The pipe scanline counter behaviour looks as follows when + * using the TV encoder: + * + * time -> + * + * dsl=vtotal-1 | | + * || || + * ___| | ___| | + * / | / | + * / | / | + * dsl=0 ___/ |_____/ | + * | | | | | | + * ^ ^ ^ ^ ^ + * | | | | pipe vblank/first part of tv vblank + * | | | bottom margin + * | | active + * | top margin + * remainder of tv vblank + * + * When the TV encoder is used the pipe wants to run faster + * than expected rate. During the active portion the TV + * encoder stalls the pipe every few lines to keep it in + * check. When the TV encoder reaches the bottom margin the + * pipe simply stops. Once we reach the TV vblank the pipe is + * no longer stalled and it runs at the max rate (apparently + * oversample clock on gen3, cdclk on gen4). Once the pipe + * reaches the pipe vtotal the pipe stops for the remainder + * of the TV vblank/top margin. The pipe starts up again when + * the TV encoder exits the top margin. + * + * To avoid huge hassles for vblank timestamping we scale + * the pipe timings as if the pipe always runs at the average + * rate it maintains during the active period. This also + * gives us a reasonable guesstimate as to the pixel rate. + * Due to the variation in the actual pipe speed the scanline + * counter will give us slightly erroneous results during the + * TV vblank/margins. But since vtotal was selected such that + * it matches the average rate of the pipe during the active + * portion the error shouldn't cause any serious grief to + * vblank timestamps. + * + * For posterity here is the empirically derived formula + * that gives us the maximum length of the pipe vblank + * we can use without causing display corruption. Following + * this would allow us to have a ticking scanline counter + * everywhere except during the bottom margin (there the + * pipe always stops). Ie. this would eliminate the second + * flat portion of the above graph. However this would also + * complicate vblank timestamping as the pipe vtotal would + * no longer match the average rate the pipe runs at during + * the active portion. Hence following this formula seems + * more trouble that it's worth. + * + * if (IS_GEN(dev_priv, 4)) { + * num = cdclk * (tv_mode->oversample >> !tv_mode->progressive); + * den = tv_mode->clock; + * } else { + * num = tv_mode->oversample >> !tv_mode->progressive; + * den = 1; + * } + * max_pipe_vblank_len ~= + * (num * tv_htotal * (tv_vblank_len + top_margin)) / + * (den * pipe_htotal); */ + intel_tv_scale_mode_horiz(adjusted_mode, hdisplay, + conn_state->tv.margins.left, + conn_state->tv.margins.right); + intel_tv_scale_mode_vert(adjusted_mode, vdisplay, + tv_conn_state->margins.top, + tv_conn_state->margins.bottom); + drm_mode_set_crtcinfo(adjusted_mode, 0); + adjusted_mode->name[0] = '\0'; + + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; return 0; } @@ -986,14 +1416,16 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); + const struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); - u32 tv_ctl; + u32 tv_ctl, tv_filter_ctl; u32 scctl1, scctl2, scctl3; int i, j; const struct video_levels *video_levels; const struct color_conversion *color_conversion; bool burst_ena; - int xpos = 0x0, ypos = 0x0; + int xpos, ypos; unsigned int xsize, ysize; if (!tv_mode) @@ -1029,7 +1461,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, } tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); - tv_ctl |= tv_mode->oversample; + + switch (tv_mode->oversample) { + case 8: + tv_ctl |= TV_OVERSAMPLE_8X; + break; + case 4: + tv_ctl |= TV_OVERSAMPLE_4X; + break; + case 2: + tv_ctl |= TV_OVERSAMPLE_2X; + break; + default: + tv_ctl |= TV_OVERSAMPLE_NONE; + break; + } if (tv_mode->progressive) tv_ctl |= TV_PROGRESSIVE; @@ -1081,19 +1527,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, assert_pipe_disabled(dev_priv, intel_crtc->pipe); /* Filter ctl must be set before TV_WIN_SIZE */ - I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); + tv_filter_ctl = TV_AUTO_SCALE; + if (tv_conn_state->bypass_vfilter) + tv_filter_ctl |= TV_V_FILTER_BYPASS; + I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl); + xsize = tv_mode->hblank_start - tv_mode->hblank_end; - if (tv_mode->progressive) - ysize = tv_mode->nbr_end + 1; - else - ysize = 2*tv_mode->nbr_end + 1; + ysize = intel_tv_mode_vdisplay(tv_mode); - xpos += conn_state->tv.margins.left; - ypos += conn_state->tv.margins.top; + xpos = conn_state->tv.margins.left; + ypos = tv_conn_state->margins.top; xsize -= (conn_state->tv.margins.left + conn_state->tv.margins.right); - ysize -= (conn_state->tv.margins.top + - conn_state->tv.margins.bottom); + ysize -= (tv_conn_state->margins.top + + tv_conn_state->margins.bottom); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1110,23 +1557,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, I915_WRITE(TV_CTL, tv_ctl); } -static const struct drm_display_mode reported_modes[] = { - { - .name = "NTSC 480i", - .clock = 107520, - .hdisplay = 1280, - .hsync_start = 1368, - .hsync_end = 1496, - .htotal = 1712, - - .vdisplay = 1024, - .vsync_start = 1027, - .vsync_end = 1034, - .vtotal = 1104, - .type = DRM_MODE_TYPE_DRIVER, - }, -}; - static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) @@ -1233,16 +1663,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector) const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + /* Component supports everything so we can keep the current mode */ + if (intel_tv->type == DRM_MODE_CONNECTOR_Component) return; + /* If the current mode is fine don't change it */ + if (!tv_mode->component_only) + return; for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { - tv_mode = tv_modes + i; + tv_mode = &tv_modes[i]; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + if (!tv_mode->component_only) break; } @@ -1254,7 +1686,6 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct drm_display_mode mode; struct intel_tv *intel_tv = intel_attached_tv(connector); enum drm_connector_status status; int type; @@ -1263,13 +1694,11 @@ intel_tv_detect(struct drm_connector *connector, connector->base.id, connector->name, force); - mode = reported_modes[0]; - if (force) { struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); if (ret < 0) return ret; @@ -1293,84 +1722,85 @@ intel_tv_detect(struct drm_connector *connector, } static const struct input_res { - const char *name; - int w, h; + u16 w, h; } input_res_table[] = { - {"640x480", 640, 480}, - {"800x600", 800, 600}, - {"1024x768", 1024, 768}, - {"1280x1024", 1280, 1024}, - {"848x480", 848, 480}, - {"1280x720", 1280, 720}, - {"1920x1080", 1920, 1080}, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 1024 }, + { 848, 480 }, + { 1280, 720 }, + { 1920, 1080 }, }; -/* - * Chose preferred mode according to line number of TV format - */ +/* Choose preferred mode according to line number of TV format */ +static bool +intel_tv_is_preferred_mode(const struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + int vdisplay = intel_tv_mode_vdisplay(tv_mode); + + /* prefer 480 line modes for all SD TV modes */ + if (vdisplay <= 576) + vdisplay = 480; + + return vdisplay == mode->vdisplay; +} + static void -intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, - struct drm_display_mode *mode_ptr) +intel_tv_set_mode_type(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) { - if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - else if (tv_mode->nbr_end > 480) { - if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { - if (mode_ptr->vdisplay == 720) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } else if (mode_ptr->vdisplay == 1080) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } + mode->type = DRM_MODE_TYPE_DRIVER; + + if (intel_tv_is_preferred_mode(mode, tv_mode)) + mode->type |= DRM_MODE_TYPE_PREFERRED; } static int intel_tv_get_modes(struct drm_connector *connector) { - struct drm_display_mode *mode_ptr; + struct drm_i915_private *dev_priv = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int j, count = 0; - u64 tmp; + int i, count = 0; - for (j = 0; j < ARRAY_SIZE(input_res_table); - j++) { - const struct input_res *input = &input_res_table[j]; - unsigned int hactive_s = input->w; - unsigned int vactive_s = input->h; + for (i = 0; i < ARRAY_SIZE(input_res_table); i++) { + const struct input_res *input = &input_res_table[i]; + struct drm_display_mode *mode; - if (tv_mode->max_srcw && input->w > tv_mode->max_srcw) + if (input->w > 1024 && + !tv_mode->progressive && + !tv_mode->component_only) continue; - if (input->w > 1024 && (!tv_mode->progressive - && !tv_mode->component_only)) + /* no vertical scaling with wide sources on gen3 */ + if (IS_GEN(dev_priv, 3) && input->w > 1024 && + input->h > intel_tv_mode_vdisplay(tv_mode)) continue; - mode_ptr = drm_mode_create(connector->dev); - if (!mode_ptr) + mode = drm_mode_create(connector->dev); + if (!mode) continue; - strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN); - - mode_ptr->hdisplay = hactive_s; - mode_ptr->hsync_start = hactive_s + 1; - mode_ptr->hsync_end = hactive_s + 64; - if (mode_ptr->hsync_end <= mode_ptr->hsync_start) - mode_ptr->hsync_end = mode_ptr->hsync_start + 1; - mode_ptr->htotal = hactive_s + 96; - - mode_ptr->vdisplay = vactive_s; - mode_ptr->vsync_start = vactive_s + 1; - mode_ptr->vsync_end = vactive_s + 32; - if (mode_ptr->vsync_end <= mode_ptr->vsync_start) - mode_ptr->vsync_end = mode_ptr->vsync_start + 1; - mode_ptr->vtotal = vactive_s + 33; - - tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); - tmp *= mode_ptr->htotal; - tmp = div_u64(tmp, 1000000); - mode_ptr->clock = (int) tmp; - - mode_ptr->type = DRM_MODE_TYPE_DRIVER; - intel_tv_choose_preferred_modes(tv_mode, mode_ptr); - drm_mode_probed_add(connector, mode_ptr); + + /* + * We take the TV mode and scale it to look + * like it had the expected h/vdisplay. This + * provides the most information to userspace + * about the actual timings of the mode. We + * do ignore the margins though. + */ + intel_tv_mode_to_mode(mode, tv_mode); + if (count == 0) { + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(mode); + } + intel_tv_scale_mode_horiz(mode, input->w, 0, 0); + intel_tv_scale_mode_vert(mode, input->h, 0, 0); + intel_tv_set_mode_type(mode, tv_mode); + + drm_mode_set_name(mode); + + drm_mode_probed_add(connector, mode); count++; } @@ -1383,7 +1813,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = { .destroy = intel_connector_destroy, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_tv_connector_duplicate_state, }; static int intel_tv_atomic_check(struct drm_connector *connector, @@ -1530,11 +1960,15 @@ intel_tv_init(struct drm_i915_private *dev_priv) connector->doublescan_allowed = false; /* Create TV properties then attach current values */ - for (i = 0; i < ARRAY_SIZE(tv_modes); i++) + for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { + /* 1080p50/1080p60 not supported on gen3 */ + if (IS_GEN(dev_priv, 3) && + tv_modes[i].oversample == 1) + break; + tv_format_names[i] = tv_modes[i].name; - drm_mode_create_tv_properties(dev, - ARRAY_SIZE(tv_modes), - tv_format_names); + } + drm_mode_create_tv_properties(dev, i, tv_format_names); drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, state->tv.mode); diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 3210ad4e08f7..15f4a6dee5aa 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void -__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) { struct i915_wa wa = { .reg = reg, @@ -153,16 +154,32 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) _wa_add(wal, &wa); } -#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, val); +} #define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) { @@ -532,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + + /* WaEnableFloatBlendOptimization:icl */ + wa_write_masked_or(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); } void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) @@ -603,43 +626,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = val, - .val = _MASKED_BIT_ENABLE(val) - }; - - _wa_add(wal, &wa); -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val - }; - - _wa_add(wal, &wa); -} - -static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, ~0, val); -} - -static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, val, val); -} - -static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { /* WaDisableKillLogic:bxt,skl,kbl */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index d0553bc69705..32dce7176f63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915, return -EINVAL; } - if (list_empty(&i915->ggtt.vm.inactive_list)) { + if (list_empty(&i915->ggtt.vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915, static void unpin_ggtt(struct drm_i915_private *i915) { + struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) + mutex_lock(&ggtt->vm.mutex); + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); + mutex_unlock(&ggtt->vm.mutex); } static void cleanup_objects(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 06bde4a273cb..3850ef4a5ec8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma) __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + mutex_lock(&vma->vm->mutex); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); } static int exercise_mock(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a15713cae3b3..76b4f87fc853 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(timelines, i915_timeline_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 1b70208eeea7..88e5ab586337 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,8 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) -selftest(timelines, i915_gem_timeline_mock_selftests) +selftest(timelines, i915_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 1f415ce47018..716a3f19f030 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -41,18 +41,37 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd) return x; } -void i915_random_reorder(unsigned int *order, unsigned int count, - struct rnd_state *state) +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state) { - unsigned int i, j; + char stack[128]; + + if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX)) + return; + + if (!elsz || !count) + return; + + /* Fisher-Yates shuffle courtesy of Knuth */ + while (--count) { + size_t swp; + + swp = i915_prandom_u32_max_state(count + 1, state); + if (swp == count) + continue; - for (i = 0; i < count; i++) { - BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); - j = i915_prandom_u32_max_state(count, state); - swap(order[i], order[j]); + memcpy(stack, arr + count * elsz, elsz); + memcpy(arr + count * elsz, arr + swp * elsz, elsz); + memcpy(arr + swp * elsz, stack, elsz); } } +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + i915_prandom_shuffle(order, sizeof(*order), count, state); +} + unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 7dffedc501ca..8e1ff9c105b6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state); +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 4d4b86b5fa11..6733dc5b6b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -25,9 +25,12 @@ #include <linux/prime_numbers.h> #include "../i915_selftest.h" +#include "i915_random.h" #include "igt_live_test.h" +#include "lib_sw_fence.h" #include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int igt_add_request(void *arg) @@ -247,6 +250,254 @@ err_context_0: return err; } +struct smoketest { + struct intel_engine_cs *engine; + struct i915_gem_context **contexts; + atomic_long_t num_waits, num_fences; + int ncontexts, max_batch; + struct i915_request *(*request_alloc)(struct i915_gem_context *, + struct intel_engine_cs *); +}; + +static struct i915_request * +__mock_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return mock_request(engine, ctx, 0); +} + +static struct i915_request * +__live_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return i915_request_alloc(engine, ctx); +} + +static int __igt_breadcrumbs_smoketest(void *arg) +{ + struct smoketest *t = arg; + struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; + const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; + const unsigned int total = 4 * t->ncontexts + 1; + unsigned int num_waits = 0, num_fences = 0; + struct i915_request **requests; + I915_RND_STATE(prng); + unsigned int *order; + int err = 0; + + /* + * A very simple test to catch the most egregious of list handling bugs. + * + * At its heart, we simply create oodles of requests running across + * multiple kthreads and enable signaling on them, for the sole purpose + * of stressing our breadcrumb handling. The only inspection we do is + * that the fences were marked as signaled. + */ + + requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + if (!requests) + return -ENOMEM; + + order = i915_random_order(total, &prng); + if (!order) { + err = -ENOMEM; + goto out_requests; + } + + while (!kthread_should_stop()) { + struct i915_sw_fence *submit, *wait; + unsigned int n, count; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + err = -ENOMEM; + break; + } + + wait = heap_fence_create(GFP_KERNEL); + if (!wait) { + i915_sw_fence_commit(submit); + heap_fence_put(submit); + err = ENOMEM; + break; + } + + i915_random_reorder(order, total, &prng); + count = 1 + i915_prandom_u32_max_state(max_batch, &prng); + + for (n = 0; n < count; n++) { + struct i915_gem_context *ctx = + t->contexts[order[n] % t->ncontexts]; + struct i915_request *rq; + + mutex_lock(BKL); + + rq = t->request_alloc(ctx, t->engine); + if (IS_ERR(rq)) { + mutex_unlock(BKL); + err = PTR_ERR(rq); + count = n; + break; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + + requests[n] = i915_request_get(rq); + i915_request_add(rq); + + mutex_unlock(BKL); + + if (err >= 0) + err = i915_sw_fence_await_dma_fence(wait, + &rq->fence, + 0, + GFP_KERNEL); + + if (err < 0) { + i915_request_put(rq); + count = n; + break; + } + } + + i915_sw_fence_commit(submit); + i915_sw_fence_commit(wait); + + if (!wait_event_timeout(wait->wait, + i915_sw_fence_done(wait), + HZ / 2)) { + struct i915_request *rq = requests[count - 1]; + + pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", + count, + rq->fence.context, rq->fence.seqno, + t->engine->name); + i915_gem_set_wedged(t->engine->i915); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_sw_fence_wait(wait); + err = -EIO; + } + + for (n = 0; n < count; n++) { + struct i915_request *rq = requests[n]; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + pr_err("%llu:%llu was not signaled!\n", + rq->fence.context, rq->fence.seqno); + err = -EINVAL; + } + + i915_request_put(rq); + } + + heap_fence_put(wait); + heap_fence_put(submit); + + if (err < 0) + break; + + num_fences += count; + num_waits++; + + cond_resched(); + } + + atomic_long_add(num_fences, &t->num_fences); + atomic_long_add(num_waits, &t->num_waits); + + kfree(order); +out_requests: + kfree(requests); + return err; +} + +static int mock_breadcrumbs_smoketest(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct smoketest t = { + .engine = i915->engine[RCS], + .ncontexts = 1024, + .max_batch = 1024, + .request_alloc = __mock_request_alloc + }; + unsigned int ncpus = num_online_cpus(); + struct task_struct **threads; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + */ + + threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + if (!threads) + return -ENOMEM; + + t.contexts = + kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + if (!t.contexts) { + ret = -ENOMEM; + goto out_threads; + } + + mutex_lock(&t.engine->i915->drm.struct_mutex); + for (n = 0; n < t.ncontexts; n++) { + t.contexts[n] = mock_context(t.engine->i915, "mock"); + if (!t.contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } + } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + + for (n = 0; n < ncpus; n++) { + threads[n] = kthread_run(__igt_breadcrumbs_smoketest, + &t, "igt/%d", n); + if (IS_ERR(threads[n])) { + ret = PTR_ERR(threads[n]); + ncpus = n; + break; + } + + get_task_struct(threads[n]); + } + + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + + for (n = 0; n < ncpus; n++) { + int err; + + err = kthread_stop(threads[n]); + if (err < 0 && !ret) + ret = err; + + put_task_struct(threads[n]); + } + pr_info("Completed %lu waits for %lu fence across %d cpus\n", + atomic_long_read(&t.num_waits), + atomic_long_read(&t.num_fences), + ncpus); + + mutex_lock(&t.engine->i915->drm.struct_mutex); +out_contexts: + for (n = 0; n < t.ncontexts; n++) { + if (!t.contexts[n]) + break; + mock_context_close(t.contexts[n]); + } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + kfree(t.contexts); +out_threads: + kfree(threads); + + return ret; +} + int i915_request_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -254,6 +505,7 @@ int i915_request_mock_selftests(void) SUBTEST(igt_wait_request), SUBTEST(igt_fence_wait), SUBTEST(igt_request_rewind), + SUBTEST(mock_breadcrumbs_smoketest), }; struct drm_i915_private *i915; intel_wakeref_t wakeref; @@ -812,6 +1064,178 @@ out_unlock: return err; } +static int +max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct i915_request *rq; + int ret; + + /* + * Before execlists, all contexts share the same ringbuffer. With + * execlists, each context/engine has a separate ringbuffer and + * for the purposes of this test, inexhaustible. + * + * For the global ringbuffer though, we have to be very careful + * that we do not wrap while preventing the execution of requests + * with a unsignaled fence. + */ + if (HAS_EXECLISTS(ctx->i915)) + return INT_MAX; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + } else { + int sz; + + ret = rq->ring->size - rq->reserved_space; + i915_request_add(rq); + + sz = rq->ring->emit - rq->head; + if (sz < 0) + sz += rq->ring->size; + ret /= sz; + ret /= 2; /* leave half spare, in case of emergency! */ + } + + return ret; +} + +static int live_breadcrumbs_smoketest(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct smoketest t[I915_NUM_ENGINES]; + unsigned int ncpus = num_online_cpus(); + unsigned long num_waits, num_fences; + struct intel_engine_cs *engine; + struct task_struct **threads; + struct igt_live_test live; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + * + * On real hardware this time. + */ + + wakeref = intel_runtime_pm_get(i915); + + file = mock_file(i915); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_rpm; + } + + threads = kcalloc(ncpus * I915_NUM_ENGINES, + sizeof(*threads), + GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_file; + } + + memset(&t[0], 0, sizeof(t[0])); + t[0].request_alloc = __live_request_alloc; + t[0].ncontexts = 64; + t[0].contexts = kmalloc_array(t[0].ncontexts, + sizeof(*t[0].contexts), + GFP_KERNEL); + if (!t[0].contexts) { + ret = -ENOMEM; + goto out_threads; + } + + mutex_lock(&i915->drm.struct_mutex); + for (n = 0; n < t[0].ncontexts; n++) { + t[0].contexts[n] = live_context(i915, file); + if (!t[0].contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } + } + + ret = igt_live_test_begin(&live, i915, __func__, ""); + if (ret) + goto out_contexts; + + for_each_engine(engine, i915, id) { + t[id] = t[0]; + t[id].engine = engine; + t[id].max_batch = max_batches(t[0].contexts[0], engine); + if (t[id].max_batch < 0) { + ret = t[id].max_batch; + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + /* One ring interleaved between requests from all cpus */ + t[id].max_batch /= num_online_cpus() + 1; + pr_debug("Limiting batches to %d requests on %s\n", + t[id].max_batch, engine->name); + + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk; + + tsk = kthread_run(__igt_breadcrumbs_smoketest, + &t[id], "igt/%d.%d", id, n); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + + get_task_struct(tsk); + threads[id * ncpus + n] = tsk; + } + } + mutex_unlock(&i915->drm.struct_mutex); + + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + +out_flush: + num_waits = 0; + num_fences = 0; + for_each_engine(engine, i915, id) { + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk = threads[id * ncpus + n]; + int err; + + if (!tsk) + continue; + + err = kthread_stop(tsk); + if (err < 0 && !ret) + ret = err; + + put_task_struct(tsk); + } + + num_waits += atomic_long_read(&t[id].num_waits); + num_fences += atomic_long_read(&t[id].num_fences); + } + pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", + num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + + mutex_lock(&i915->drm.struct_mutex); + ret = igt_live_test_end(&live) ?: ret; +out_contexts: + mutex_unlock(&i915->drm.struct_mutex); + kfree(t[0].contexts); +out_threads: + kfree(threads); +out_file: + mock_file_free(i915, file); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + + return ret; +} + int i915_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), SUBTEST(live_empty_request), + SUBTEST(live_breadcrumbs_smoketest), }; if (i915_terminally_wedged(&i915->gpu_error)) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 86c54ea37f48..10ef0e636a24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev) return 0; } +static bool apply_subtest_filter(const char *caller, const char *name) +{ + char *filter, *sep, *tok; + bool result = true; + + filter = kstrdup(i915_selftest.filter, GFP_KERNEL); + for (sep = filter; (tok = strsep(&sep, ","));) { + bool allow = true; + char *sl; + + if (*tok == '!') { + allow = false; + tok++; + } + + if (*tok == '\0') + continue; + + sl = strchr(tok, '/'); + if (sl) { + *sl++ = '\0'; + if (strcmp(tok, caller)) { + if (allow) + result = false; + continue; + } + tok = sl; + } + + if (strcmp(tok, name)) { + if (allow) + result = false; + continue; + } + + result = allow; + break; + } + kfree(filter); + + return result; +} + int __i915_subtests(const char *caller, const struct i915_subtest *st, unsigned int count, @@ -209,6 +252,9 @@ int __i915_subtests(const char *caller, if (signal_pending(current)) return -EINTR; + if (!apply_subtest_filter(caller, st->name)) + continue; + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); GEM_TRACE("Running %s/%s\n", caller, st->name); @@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...) module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); +module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 19f1c6a5c8fb..12ea69b1a1e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -4,12 +4,155 @@ * Copyright © 2017-2018 Intel Corporation */ +#include <linux/prime_numbers.h> + #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" #include "mock_gem_device.h" #include "mock_timeline.h" +static struct page *hwsp_page(struct i915_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct i915_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct i915_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct i915_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + i915_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct i915_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = i915_timeline_create(state->i915, "mock", NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + i915_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + struct __igt_sync { const char *name; u32 seqno; @@ -256,12 +399,331 @@ static int bench_sync(void *arg) return 0; } -int i915_gem_timeline_mock_selftests(void) +int i915_timeline_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), SUBTEST(igt_sync), SUBTEST(bench_sync), }; return i915_subtests(tests, NULL); } + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_request * +tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */ + + err = i915_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_alloc(engine, engine->i915->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + i915_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct i915_timeline * +checked_i915_timeline_create(struct drm_i915_private *i915) +{ + struct i915_timeline *tl; + + tl = i915_timeline_create(i915, "live", NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + i915_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct i915_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + i915_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + i915_timeline_put(tl); + count++; + + if (err) + goto out; + + i915_timelines_park(i915); /* Encourage recycling! */ + } while (!__igt_timeout(end_time, NULL)); + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int i915_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index f0a32edfb9b1..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg) } count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) allocated an extra full vma!\n", p->name); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 5deb485fb942..3e902761cd16 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t, return err; } - i915->gpu_error.missed_irq_rings = 0; t->reset_global = i915_reset_count(&i915->gpu_error); for_each_engine(engine, i915, id) @@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - return 0; } diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0e70df0230b8..9ebd9225684e 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { - if (!wait_event_timeout(rq->execute, - READ_ONCE(rq->global_seqno), - msecs_to_jiffies(10))) - return false; - return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), 10) && diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c deleted file mode 100644 index f03b407fdbe2..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" -#include "i915_random.h" - -#include "mock_gem_device.h" -#include "mock_engine.h" - -static int check_rbtree(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - int n; - - if (&b->irq_wait->node != rb_first(&b->waiters)) { - pr_err("First waiter does not match first element of wait-tree\n"); - return -EINVAL; - } - - n = find_first_bit(bitmap, count); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); - int idx = w - waiters; - - if (!test_bit(idx, bitmap)) { - pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", - idx, w->seqno); - return -EINVAL; - } - - if (n != idx) { - pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", - idx, w->seqno, n); - return -EINVAL; - } - - n = find_next_bit(bitmap, count, n + 1); - } - - return 0; -} - -static int check_completion(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - int n; - - for (n = 0; n < count; n++) { - if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) - continue; - - pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", - n, waiters[n].seqno, - intel_wait_complete(&waiters[n]) ? "complete" : "active", - test_bit(n, bitmap) ? "active" : "complete"); - return -EINVAL; - } - - return 0; -} - -static int check_rbtree_empty(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - if (b->irq_wait) { - pr_err("Empty breadcrumbs still has a waiter\n"); - return -EINVAL; - } - - if (!RB_EMPTY_ROOT(&b->waiters)) { - pr_err("Empty breadcrumbs, but wait-tree not empty\n"); - return -EINVAL; - } - - return 0; -} - -static int igt_random_insert_remove(void *arg) -{ - const u32 seqno_bias = 0x1000; - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned int *order; - unsigned long *bitmap; - int err = -ENOMEM; - int n; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - order = i915_random_order(count, &prng); - if (!order) - goto out_bitmap; - - for (n = 0; n < count; n++) - intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - - /* Add and remove waiters into the rbtree in random order. At each - * step, we verify that the rbtree is correctly ordered. - */ - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_add_wait(engine, &waiters[i]); - __set_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - i915_random_reorder(order, count, &prng); - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_remove_wait(engine, &waiters[i]); - __clear_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - err = check_rbtree_empty(engine); -out_order: - kfree(order); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -static int igt_insert_complete(void *arg) -{ - const u32 seqno_bias = 0x1000; - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned long *bitmap; - int err = -ENOMEM; - int n, m; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - for (n = 0; n < count; n++) { - intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); - intel_engine_add_wait(engine, &waiters[n]); - __set_bit(n, bitmap); - } - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_bitmap; - - /* On each step, we advance the seqno so that several waiters are then - * complete (we increase the seqno by increasingly larger values to - * retire more and more waiters at once). All retired waiters should - * be woken and removed from the rbtree, and so that we check. - */ - for (n = 0; n < count; n = m) { - int seqno = 2 * n; - - GEM_BUG_ON(find_first_bit(bitmap, count) != n); - - if (intel_wait_complete(&waiters[n])) { - pr_err("waiter[%d, seqno=%d] completed too early\n", - n, waiters[n].seqno); - err = -EINVAL; - goto out_bitmap; - } - - /* complete the following waiters */ - mock_seqno_advance(engine, seqno + seqno_bias); - for (m = n; m <= seqno; m++) { - if (m == count) - break; - - GEM_BUG_ON(!test_bit(m, bitmap)); - __clear_bit(m, bitmap); - } - - intel_engine_remove_wait(engine, &waiters[n]); - RB_CLEAR_NODE(&waiters[n].node); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) { - pr_err("rbtree corrupt after seqno advance to %d\n", - seqno + seqno_bias); - goto out_bitmap; - } - - err = check_completion(engine, bitmap, waiters, count); - if (err) { - pr_err("completions after seqno advance to %d failed\n", - seqno + seqno_bias); - goto out_bitmap; - } - } - - err = check_rbtree_empty(engine); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -struct igt_wakeup { - struct task_struct *tsk; - atomic_t *ready, *set, *done; - struct intel_engine_cs *engine; - unsigned long flags; -#define STOP 0 -#define IDLE 1 - wait_queue_head_t *wq; - u32 seqno; -}; - -static bool wait_for_ready(struct igt_wakeup *w) -{ - DEFINE_WAIT(ready); - - set_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->done)) - wake_up_var(w->done); - - if (test_bit(STOP, &w->flags)) - goto out; - - for (;;) { - prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); - if (atomic_read(w->ready) == 0) - break; - - schedule(); - } - finish_wait(w->wq, &ready); - -out: - clear_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->set)) - wake_up_var(w->set); - - return !test_bit(STOP, &w->flags); -} - -static int igt_wakeup_thread(void *arg) -{ - struct igt_wakeup *w = arg; - struct intel_wait wait; - - while (wait_for_ready(w)) { - GEM_BUG_ON(kthread_should_stop()); - - intel_wait_init_for_seqno(&wait, w->seqno); - intel_engine_add_wait(w->engine, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (i915_seqno_passed(intel_engine_get_seqno(w->engine), - w->seqno)) - break; - - if (test_bit(STOP, &w->flags)) /* emergency escape */ - break; - - schedule(); - } - intel_engine_remove_wait(w->engine, &wait); - __set_current_state(TASK_RUNNING); - } - - return 0; -} - -static void igt_wake_all_sync(atomic_t *ready, - atomic_t *set, - atomic_t *done, - wait_queue_head_t *wq, - int count) -{ - atomic_set(set, count); - atomic_set(ready, 0); - wake_up_all(wq); - - wait_var_event(set, !atomic_read(set)); - atomic_set(ready, count); - atomic_set(done, count); -} - -static int igt_wakeup(void *arg) -{ - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct igt_wakeup *waiters; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - const int count = 4096; - const u32 max_seqno = count / 4; - atomic_t ready, set, done; - int err = -ENOMEM; - int n, step; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - /* Create a large number of threads, each waiting on a random seqno. - * Multiple waiters will be waiting for the same seqno. - */ - atomic_set(&ready, count); - for (n = 0; n < count; n++) { - waiters[n].wq = &wq; - waiters[n].ready = &ready; - waiters[n].set = &set; - waiters[n].done = &done; - waiters[n].engine = engine; - waiters[n].flags = BIT(IDLE); - - waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], - "i915/igt:%d", n); - if (IS_ERR(waiters[n].tsk)) - goto out_waiters; - - get_task_struct(waiters[n].tsk); - } - - for (step = 1; step <= max_seqno; step <<= 1) { - u32 seqno; - - /* The waiter threads start paused as we assign them a random - * seqno and reset the engine. Once the engine is reset, - * we signal that the threads may begin their wait upon their - * seqno. - */ - for (n = 0; n < count; n++) { - GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); - waiters[n].seqno = - 1 + prandom_u32_state(&prng) % max_seqno; - } - mock_seqno_advance(engine, 0); - igt_wake_all_sync(&ready, &set, &done, &wq, count); - - /* Simulate the GPU doing chunks of work, with one or more - * seqno appearing to finish at the same time. A random number - * of threads will be waiting upon the update and hopefully be - * woken. - */ - for (seqno = 1; seqno <= max_seqno + step; seqno += step) { - usleep_range(50, 500); - mock_seqno_advance(engine, seqno); - } - GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); - - /* With the seqno now beyond any of the waiting threads, they - * should all be woken, see that they are complete and signal - * that they are ready for the next test. We wait until all - * threads are complete and waiting for us (i.e. not a seqno). - */ - if (!wait_var_event_timeout(&done, - !atomic_read(&done), 10 * HZ)) { - pr_err("Timed out waiting for %d remaining waiters\n", - atomic_read(&done)); - err = -ETIMEDOUT; - break; - } - - err = check_rbtree_empty(engine); - if (err) - break; - } - -out_waiters: - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - set_bit(STOP, &waiters[n].flags); - } - mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ - igt_wake_all_sync(&ready, &set, &done, &wq, n); - - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - kthread_stop(waiters[n].tsk); - put_task_struct(waiters[n].tsk); - } - - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -int intel_breadcrumbs_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_random_insert_remove), - SUBTEST(igt_insert_complete), - SUBTEST(igt_wakeup), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915->engine[RCS]); - drm_dev_put(&i915->drm); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 12550b55c42f..7b6f3bea9ef8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -363,9 +363,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); - mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); i915_reset(i915, ALL_ENGINES, NULL); @@ -374,9 +372,7 @@ static int igt_global_reset(void *arg) pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - mutex_unlock(&i915->drm.struct_mutex); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) @@ -393,18 +389,16 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); i915_gem_set_wedged(i915); - GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); i915_reset(i915, ALL_ENGINES, NULL); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); + mutex_unlock(&i915->drm.struct_mutex); intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; @@ -455,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); - if (active) { struct i915_request *rq; @@ -485,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; i915_request_put(rq); } @@ -502,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_engine_count += active; if (i915_reset_engine_count(&i915->gpu_error, engine) != - reset_engine_count) { - pr_err("%s engine reset %srecorded!\n", - engine->name, active ? "not " : ""); + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); err = -EINVAL; break; } - if (!wait_for_idle(engine)) { + if (!i915_reset_flush(i915)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -734,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { @@ -762,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, err = -EIO; break; } - - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; } err = i915_reset_engine(engine, NULL); @@ -801,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, reported = i915_reset_engine_count(&i915->gpu_error, engine); reported -= threads[engine->id].resets; - if (reported != (flags & TEST_ACTIVE ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", - engine->name, test_name, count, reported, - (flags & TEST_ACTIVE ? count : 0)); + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); if (!err) err = -EINVAL; } @@ -903,20 +887,13 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct i915_request *rq, u32 mask) +static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask) { - struct i915_gpu_error *error = &rq->i915->gpu_error; - u32 reset_count = i915_reset_count(error); - - error->stalled_mask = mask; - - /* set_bit() must be after we have setup the backchannel (mask) */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); + u32 count = i915_reset_count(&i915->gpu_error); - wake_up_all(&error->wait_queue); + i915_reset(i915, mask, NULL); - return reset_count; + return count; } static int igt_reset_wait(void *arg) @@ -962,7 +939,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - reset_count = fake_hangcheck(rq, ALL_ENGINES); + reset_count = fake_hangcheck(i915, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -972,7 +949,6 @@ static int igt_reset_wait(void *arg) goto out_rq; } - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; @@ -1151,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); - if (wait_for(waitqueue_active(&rq->execute), 10)) { + if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); @@ -1162,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } out_reset: - fake_hangcheck(rq, intel_engine_flag(rq->engine)); + fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); if (tsk) { struct igt_wedge_me w; @@ -1341,12 +1317,7 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - - i915_reset(i915, ENGINE_MASK(id), NULL); - - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, - &i915->gpu_error.flags)); + reset_count = fake_hangcheck(i915, ENGINE_MASK(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1565,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); + i915_gem_set_wedged(i915); err = -EIO; } @@ -1588,7 +1560,6 @@ out: static void force_reset(struct drm_i915_private *i915) { i915_gem_set_wedged(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); i915_reset(i915, 0, NULL); } @@ -1618,6 +1589,26 @@ static int igt_atomic_reset(void *arg) if (i915_terminally_wedged(&i915->gpu_error)) goto unlock; + if (intel_has_gpu_reset(i915)) { + const typeof(*phases) *p; + + for (p = phases; p->name; p++) { + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + + p->critical_section_begin(); + err = intel_gpu_reset(i915, ALL_ENGINES); + p->critical_section_end(); + + if (err) { + pr_err("intel_gpu_reset failed under %s\n", + p->name); + goto out; + } + } + + force_reset(i915); + } + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1674,6 +1665,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) wakeref = intel_runtime_pm_get(i915); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ err = i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 2b2ecd76c2ac..fb35f53c9ce3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -268,6 +268,143 @@ err_wedged: goto err_ctx_lo; } +struct preempt_client { + struct igt_spinner spin; + struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct drm_i915_private *i915, + struct preempt_client *c) +{ + c->ctx = kernel_context(i915); + if (!c->ctx) + return -ENOMEM; + + if (igt_spinner_init(&c->spin, i915)) + goto err_ctx; + + return 0; + +err_ctx: + kernel_context_close(c->ctx); + return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ + igt_spinner_fini(&c->spin); + kernel_context_close(c->ctx); +} + +static int live_suppress_self_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) + }; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that if a preemption request does not cause a change in + * the current execution order, the preempt-to-idle injection is + * skipped and that we do not accidentally apply it after the CS + * completion event. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; /* presume black blox */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + int depth; + + engine->execlists.preempt_hang.count = 0; + + rq_a = igt_spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + for (depth = 0; depth < 8; depth++) { + rq_b = igt_spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + i915_request_add(rq_b); + + GEM_BUG_ON(i915_request_completed(rq_a)); + engine->schedule(rq_a, &attr); + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + swap(a, b); + rq_a = rq_b; + } + igt_spinner_end(&a.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + engine->execlists.preempt_hang.count, + depth); + err = -EINVAL; + goto err_client_b; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_b; +} + static int live_preempt_hang(void *arg) { struct drm_i915_private *i915 = arg; @@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_suppress_self_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), }; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index a8cac56be835..b15c4f26c593 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -214,7 +214,6 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags); i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); return 0; } @@ -394,7 +393,6 @@ static int live_gpu_reset_gt_engine_workarounds(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_gpu_error *error = &i915->gpu_error; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg) if (!ok) goto out; - set_bit(I915_RESET_HANDOFF, &error->flags); i915_reset(i915, ALL_ENGINES, "live_workarounds"); ok = verify_gt_engine_wa(i915, &lists, "after reset"); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b26f07b55d86..2bfa72c1654b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf) destroy_timer_on_stack(&tf->timer); i915_sw_fence_fini(&tf->fence); } + +struct heap_fence { + struct i915_sw_fence fence; + union { + struct kref ref; + struct rcu_head rcu; + }; +}; + +static int __i915_sw_fence_call +heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + switch (state) { + case FENCE_COMPLETE: + break; + + case FENCE_FREE: + heap_fence_put(&h->fence); + } + + return NOTIFY_DONE; +} + +struct i915_sw_fence *heap_fence_create(gfp_t gfp) +{ + struct heap_fence *h; + + h = kmalloc(sizeof(*h), gfp); + if (!h) + return NULL; + + i915_sw_fence_init(&h->fence, heap_fence_notify); + refcount_set(&h->ref.refcount, 2); + + return &h->fence; +} + +static void heap_fence_release(struct kref *ref) +{ + struct heap_fence *h = container_of(ref, typeof(*h), ref); + + i915_sw_fence_fini(&h->fence); + + kfree_rcu(h, rcu); +} + +void heap_fence_put(struct i915_sw_fence *fence) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + kref_put(&h->ref, heap_fence_release); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h index 474aafb92ae1..1f9927e10f3a 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -39,4 +39,7 @@ struct timed_fence { void timed_fence_init(struct timed_fence *tf, unsigned long expires); void timed_fence_fini(struct timed_fence *tf); +struct i915_sw_fence *heap_fence_create(gfp_t gfp); +void heap_fence_put(struct i915_sw_fence *fence); + #endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 442ec2aeec81..08f0cab02e0f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,17 @@ struct mock_ring { struct i915_timeline timeline; }; +static void mock_timeline_pin(struct i915_timeline *tl) +{ + tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + tl->pin_count--; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; @@ -39,7 +50,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - i915_timeline_init(engine->i915, &ring->timeline, engine->name); + if (i915_timeline_init(engine->i915, + &ring->timeline, engine->name, + NULL)) { + kfree(ring); + return NULL; + } ring->base.size = sz; ring->base.effective_size = sz; @@ -70,15 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine) static void advance(struct mock_request *request) { list_del_init(&request->link); - mock_seqno_advance(request->base.engine, request->base.global_seqno); + intel_engine_write_global_seqno(request->base.engine, + request->base.global_seqno); + i915_request_mark_complete(&request->base); + GEM_BUG_ON(!i915_request_completed(&request->base)); + + intel_engine_queue_breadcrumbs(request->base.engine); } static void hw_delay_complete(struct timer_list *t) { struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; + unsigned long flags; - spin_lock(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); /* Timer fired, first request is complete */ request = first_request(engine); @@ -98,11 +120,12 @@ static void hw_delay_complete(struct timer_list *t) advance(request); } - spin_unlock(&engine->hw_lock); + spin_unlock_irqrestore(&engine->hw_lock, flags); } static void mock_context_unpin(struct intel_context *ce) { + mock_timeline_unpin(ce->ring->timeline); i915_gem_context_put(ce->gem_context); } @@ -124,6 +147,7 @@ mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); + int err = -ENOMEM; if (ce->pin_count++) return ce; @@ -134,13 +158,15 @@ mock_context_pin(struct intel_engine_cs *engine, goto err; } + mock_timeline_pin(ce->ring->timeline); + ce->ops = &mock_context_ops; i915_gem_context_get(ctx); return ce; err: ce->pin_count = 0; - return ERR_PTR(-ENOMEM); + return ERR_PTR(err); } static int mock_request_alloc(struct i915_request *request) @@ -159,9 +185,9 @@ static int mock_emit_flush(struct i915_request *request, return 0; } -static void mock_emit_breadcrumb(struct i915_request *request, - u32 *flags) +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) { + return cs; } static void mock_submit_request(struct i915_request *request) @@ -169,11 +195,12 @@ static void mock_submit_request(struct i915_request *request) struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); + unsigned long flags; i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); - spin_lock_irq(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); if (mock->link.prev == &engine->hw_queue) { if (mock->delay) @@ -181,7 +208,7 @@ static void mock_submit_request(struct i915_request *request) else advance(mock); } - spin_unlock_irq(&engine->hw_lock); + spin_unlock_irqrestore(&engine->hw_lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -200,15 +227,19 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; - engine->base.status_page.page_addr = (void *)(engine + 1); + engine->base.status_page.addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; - engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + if (i915_timeline_init(i915, + &engine->base.timeline, + engine->base.name, + NULL)) + goto err_free; i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); @@ -226,6 +257,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); i915_timeline_fini(&engine->base.timeline); +err_free: kfree(engine); return NULL; } @@ -246,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { - intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); + intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 133d0c21790d..b9cc3a245f16 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); void mock_engine_free(struct intel_engine_cs *engine); -static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - intel_engine_wakeup(engine); -} - #endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 5477ad4a7e7d..14ae46fda49f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + drain_delayed_work(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); mutex_unlock(&i915->drm.struct_mutex); - WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - INIT_LIST_HEAD(&i915->gt.timelines); + i915_timelines_init(i915); + INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); @@ -253,6 +255,7 @@ err_context: i915_gem_contexts_fini(i915); err_unlock: mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index dcf3b16f5a07..cf39ccd9fc05 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -10,6 +10,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) { + timeline->i915 = NULL; timeline->fence_context = context; spin_lock_init(&timeline->lock); @@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) void mock_timeline_fini(struct i915_timeline *timeline) { - i915_timeline_fini(timeline); + i915_syncmap_free(&timeline->sync); } diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 6affbda6d9cb..d1c662d92ab7 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -96,6 +96,5 @@ enum drm_color_lut_tests { DRM_COLOR_LUT_NON_DECREASING = BIT(1), }; -int drm_color_lut_check(struct drm_property_blob *lut, - uint32_t tests); +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests); #endif diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index df72be7e8b88..d2fad7b0fcf6 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -394,6 +394,9 @@ INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */ /* CFL H */ +#define INTEL_CFL_H_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x3E9C, info) + #define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ @@ -426,6 +429,7 @@ #define INTEL_CFL_IDS(info) \ INTEL_CFL_S_GT1_IDS(info), \ INTEL_CFL_S_GT2_IDS(info), \ + INTEL_CFL_H_GT1_IDS(info), \ INTEL_CFL_H_GT2_IDS(info), \ INTEL_CFL_U_GT2_IDS(info), \ INTEL_CFL_U_GT3_IDS(info), \ |