diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 81 |
1 files changed, 32 insertions, 49 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a31b7a891ec..24b5b046754b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -244,14 +244,16 @@ err_phys: static void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + bool needs_clflush) { GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); if (obj->mm.madv == I915_MADV_DONTNEED) obj->mm.dirty = false; - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && + if (needs_clflush && + (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) drm_clflush_sg(pages); @@ -263,7 +265,7 @@ static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { - __i915_gem_object_release_shmem(obj, pages); + __i915_gem_object_release_shmem(obj, pages, false); if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; @@ -593,47 +595,21 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_device *dev = obj->base.dev; void *vaddr = obj->phys_handle->vaddr + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); - int ret; /* We manually control the domain here and pretend that it * remains coherent i.e. in the GTT domain, like shmem_pwrite. */ - lockdep_assert_held(&obj->base.dev->struct_mutex); - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); - if (ret) - return ret; - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { - unsigned long unwritten; - - /* The physical object once assigned is fixed for the lifetime - * of the obj, so we can safely drop the lock and continue - * to access vaddr. - */ - mutex_unlock(&dev->struct_mutex); - unwritten = copy_from_user(vaddr, user_data, args->size); - mutex_lock(&dev->struct_mutex); - if (unwritten) { - ret = -EFAULT; - goto out; - } - } + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; drm_clflush_virt_range(vaddr, args->size); - i915_gem_chipset_flush(to_i915(dev)); + i915_gem_chipset_flush(to_i915(obj->base.dev)); -out: intel_fb_obj_flush(obj, false, ORIGIN_CPU); - return ret; + return 0; } void *i915_gem_object_alloc(struct drm_device *dev) @@ -2034,8 +2010,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - if (WARN_ON(reg->pin_count)) - continue; + /* Ideally we want to assert that the fence register is not + * live at this point (i.e. that no piece of code will be + * trying to write through fence + GTT, as that both violates + * our tracking of activity and associated locking/barriers, + * but also is illegal given that the hw is powered down). + * + * Previously we used reg->pin_count as a "liveness" indicator. + * That is not sufficient, and we need a more fine-grained + * tool if we want to have a sanity check here. + */ if (!reg->vma) continue; @@ -2231,7 +2215,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - __i915_gem_object_release_shmem(obj, pages); + __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -2304,15 +2288,6 @@ unlock: mutex_unlock(&obj->mm.lock); } -static unsigned int swiotlb_max_size(void) -{ -#if IS_ENABLED(CONFIG_SWIOTLB) - return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE); -#else - return 0; -#endif -} - static void i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; @@ -2322,7 +2297,7 @@ static void i915_sg_trim(struct sg_table *orig_st) if (orig_st->nents == orig_st->orig_nents) return; - if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL)) + if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) return; new_sg = new_st.sgl; @@ -2360,7 +2335,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - max_segment = swiotlb_max_size(); + max_segment = swiotlb_max_segment(); if (!max_segment) max_segment = rounddown(UINT_MAX, PAGE_SIZE); @@ -2728,6 +2703,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) struct drm_i915_gem_request *request; struct i915_gem_context *incomplete_ctx; struct intel_timeline *timeline; + unsigned long flags; bool ring_hung; if (engine->irq_seqno_barrier) @@ -2763,13 +2739,20 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (i915_gem_context_is_default(incomplete_ctx)) return; + timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); + + spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock(&timeline->lock); + list_for_each_entry_continue(request, &engine->timeline->requests, link) if (request->ctx == incomplete_ctx) reset_request(request); - timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); list_for_each_entry(request, &timeline->requests, link) reset_request(request); + + spin_unlock(&timeline->lock); + spin_unlock_irqrestore(&engine->timeline->lock, flags); } void i915_gem_reset(struct drm_i915_private *dev_priv) @@ -3503,7 +3486,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty) { + if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj, true); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } |