diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 904 |
1 files changed, 585 insertions, 319 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b7440f06c5e2..db8eb1c6afe9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -15,8 +15,8 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" -#include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" @@ -40,6 +40,11 @@ struct eb_vma { u32 handle; }; +struct eb_vma_array { + struct kref kref; + struct eb_vma vma[]; +}; + enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, @@ -52,7 +57,6 @@ enum { #define __EXEC_OBJECT_NEEDS_MAP BIT(29) #define __EXEC_OBJECT_NEEDS_BIAS BIT(28) #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ -#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) #define __EXEC_INTERNAL_FLAGS (~0u << 31) @@ -264,7 +268,9 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct i915_vma *target; struct i915_request *rq; + struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -283,6 +289,7 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ + struct eb_vma_array *array; }; static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) @@ -292,8 +299,62 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) eb->args->batch_len); } +static struct eb_vma_array *eb_vma_array_create(unsigned int count) +{ + struct eb_vma_array *arr; + + arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN); + if (!arr) + return NULL; + + kref_init(&arr->kref); + arr->vma[0].vma = NULL; + + return arr; +} + +static inline void eb_unreserve_vma(struct eb_vma *ev) +{ + struct i915_vma *vma = ev->vma; + + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(vma); + + if (ev->flags & __EXEC_OBJECT_HAS_PIN) + __i915_vma_unpin(vma); + + ev->flags &= ~(__EXEC_OBJECT_HAS_PIN | + __EXEC_OBJECT_HAS_FENCE); +} + +static void eb_vma_array_destroy(struct kref *kref) +{ + struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref); + struct eb_vma *ev = arr->vma; + + while (ev->vma) { + eb_unreserve_vma(ev); + i915_vma_put(ev->vma); + ev++; + } + + kvfree(arr); +} + +static void eb_vma_array_put(struct eb_vma_array *arr) +{ + kref_put(&arr->kref, eb_vma_array_destroy); +} + static int eb_create(struct i915_execbuffer *eb) { + /* Allocate an extra slot for use by the command parser + sentinel */ + eb->array = eb_vma_array_create(eb->buffer_count + 2); + if (!eb->array) + return -ENOMEM; + + eb->vma = eb->array->vma; + if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { unsigned int size = 1 + ilog2(eb->buffer_count); @@ -327,8 +388,10 @@ static int eb_create(struct i915_execbuffer *eb) break; } while (--size); - if (unlikely(!size)) + if (unlikely(!size)) { + eb_vma_array_put(eb->array); return -ENOMEM; + } eb->lut_size = size; } else { @@ -368,6 +431,32 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, return false; } +static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, + unsigned int exec_flags) +{ + u64 pin_flags = 0; + + if (exec_flags & EXEC_OBJECT_NEEDS_GTT) + pin_flags |= PIN_GLOBAL; + + /* + * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + pin_flags |= PIN_ZONE_4G; + + if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) + pin_flags |= PIN_MAPPABLE; + + if (exec_flags & EXEC_OBJECT_PINNED) + pin_flags |= entry->offset | PIN_OFFSET_FIXED; + else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) + pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + + return pin_flags; +} + static inline bool eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, @@ -385,8 +474,19 @@ eb_pin_vma(struct i915_execbuffer *eb, if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) pin_flags |= PIN_GLOBAL; - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) - return false; + /* Attempt to reuse the current location if available */ + if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) { + if (entry->flags & EXEC_OBJECT_PINNED) + return false; + + /* Failing that pick any _free_ space if suitable */ + if (unlikely(i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + eb_pin_flags(entry, ev->flags) | + PIN_USER | PIN_NOEVICT))) + return false; + } if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { if (unlikely(i915_vma_pin_fence(vma))) { @@ -402,26 +502,6 @@ eb_pin_vma(struct i915_execbuffer *eb, return !eb_vma_misplaced(entry, vma, ev->flags); } -static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) -{ - GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - __i915_vma_unpin(vma); -} - -static inline void -eb_unreserve_vma(struct eb_vma *ev) -{ - if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) - return; - - __eb_unreserve_vma(ev->vma, ev->flags); - ev->flags &= ~__EXEC_OBJECT_RESERVED; -} - static int eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, @@ -481,7 +561,7 @@ eb_add_vma(struct i915_execbuffer *eb, GEM_BUG_ON(i915_vma_is_closed(vma)); - ev->vma = i915_vma_get(vma); + ev->vma = vma; ev->exec = entry; ev->flags = entry->flags; @@ -547,28 +627,9 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, u64 pin_flags) { struct drm_i915_gem_exec_object2 *entry = ev->exec; - unsigned int exec_flags = ev->flags; struct i915_vma *vma = ev->vma; int err; - if (exec_flags & EXEC_OBJECT_NEEDS_GTT) - pin_flags |= PIN_GLOBAL; - - /* - * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - pin_flags |= PIN_ZONE_4G; - - if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) - pin_flags |= PIN_MAPPABLE; - - if (exec_flags & EXEC_OBJECT_PINNED) - pin_flags |= entry->offset | PIN_OFFSET_FIXED; - else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) - pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - if (drm_mm_node_allocated(&vma->node) && eb_vma_misplaced(entry, vma, ev->flags)) { err = i915_vma_unbind(vma); @@ -578,7 +639,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, - pin_flags); + eb_pin_flags(entry, ev->flags) | pin_flags); if (err) return err; @@ -587,7 +648,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, eb->args->flags |= __EXEC_HAS_RELOC; } - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { err = i915_vma_pin_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); @@ -595,10 +656,10 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; + ev->flags |= __EXEC_OBJECT_HAS_FENCE; } - ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + ev->flags |= __EXEC_OBJECT_HAS_PIN; GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); return 0; @@ -728,77 +789,117 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static int eb_lookup_vmas(struct i915_execbuffer *eb) +static int __eb_add_lut(struct i915_execbuffer *eb, + u32 handle, struct i915_vma *vma) { - struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; - struct drm_i915_gem_object *obj; - unsigned int i, batch; + struct i915_gem_context *ctx = eb->gem_context; + struct i915_lut_handle *lut; int err; - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) - return -ENOENT; + lut = i915_lut_handle_alloc(); + if (unlikely(!lut)) + return -ENOMEM; - INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); + i915_vma_get(vma); + if (!atomic_fetch_inc(&vma->open_count)) + i915_vma_reopen(vma); + lut->handle = handle; + lut->ctx = ctx; + + /* Check that the context hasn't been closed in the meantime */ + err = -EINTR; + if (!mutex_lock_interruptible(&ctx->mutex)) { + err = -ENOENT; + if (likely(!i915_gem_context_is_closed(ctx))) + err = radix_tree_insert(&ctx->handles_vma, handle, vma); + if (err == 0) { /* And nor has this handle */ + struct drm_i915_gem_object *obj = vma->obj; + + i915_gem_object_lock(obj); + if (idr_find(&eb->file->object_idr, handle) == obj) { + list_add(&lut->obj_link, &obj->lut_list); + } else { + radix_tree_delete(&ctx->handles_vma, handle); + err = -ENOENT; + } + i915_gem_object_unlock(obj); + } + mutex_unlock(&ctx->mutex); + } + if (unlikely(err)) + goto err; - batch = eb_batch_index(eb); + return 0; - for (i = 0; i < eb->buffer_count; i++) { - u32 handle = eb->exec[i].handle; - struct i915_lut_handle *lut; +err: + i915_vma_close(vma); + i915_vma_put(vma); + i915_lut_handle_free(lut); + return err; +} + +static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) +{ + do { + struct drm_i915_gem_object *obj; struct i915_vma *vma; + int err; - vma = radix_tree_lookup(handles_vma, handle); + rcu_read_lock(); + vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); + if (likely(vma)) + vma = i915_vma_tryget(vma); + rcu_read_unlock(); if (likely(vma)) - goto add_vma; + return vma; obj = i915_gem_object_lookup(eb->file, handle); - if (unlikely(!obj)) { - err = -ENOENT; - goto err_vma; - } + if (unlikely(!obj)) + return ERR_PTR(-ENOENT); vma = i915_vma_instance(obj, eb->context->vm, NULL); if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; + i915_gem_object_put(obj); + return vma; } - lut = i915_lut_handle_alloc(); - if (unlikely(!lut)) { - err = -ENOMEM; - goto err_obj; - } + err = __eb_add_lut(eb, handle, vma); + if (likely(!err)) + return vma; - err = radix_tree_insert(handles_vma, handle, vma); - if (unlikely(err)) { - i915_lut_handle_free(lut); - goto err_obj; - } + i915_gem_object_put(obj); + if (err != -EEXIST) + return ERR_PTR(err); + } while (1); +} - /* transfer ref to lut */ - if (!atomic_fetch_inc(&vma->open_count)) - i915_vma_reopen(vma); - lut->handle = handle; - lut->ctx = eb->gem_context; +static int eb_lookup_vmas(struct i915_execbuffer *eb) +{ + unsigned int batch = eb_batch_index(eb); + unsigned int i; + int err = 0; - i915_gem_object_lock(obj); - list_add(&lut->obj_link, &obj->lut_list); - i915_gem_object_unlock(obj); + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); + + for (i = 0; i < eb->buffer_count; i++) { + struct i915_vma *vma; + + vma = eb_lookup_vma(eb, eb->exec[i].handle); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } -add_vma: err = eb_validate_vma(eb, &eb->exec[i], vma); - if (unlikely(err)) - goto err_vma; + if (unlikely(err)) { + i915_vma_put(vma); + break; + } eb_add_vma(eb, i, batch, vma); } - return 0; - -err_obj: - i915_gem_object_put(obj); -err_vma: eb->vma[i].vma = NULL; return err; } @@ -823,31 +924,13 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - struct eb_vma *ev = &eb->vma[i]; - struct i915_vma *vma = ev->vma; - - if (!vma) - break; - - eb->vma[i].vma = NULL; - - if (ev->flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, ev->flags); - - i915_vma_put(vma); - } -} - static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); + if (eb->array) + eb_vma_array_put(eb->array); + if (eb->lut_size > 0) kfree(eb->buckets); } @@ -872,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; cache->rq = NULL; - cache->rq_size = 0; + cache->target = NULL; } static inline void *unmask_page(unsigned long p) @@ -894,29 +977,122 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_gpu_flush(struct reloc_cache *cache) +#define RELOC_TAIL 4 + +static int reloc_gpu_chain(struct reloc_cache *cache) { - struct drm_i915_gem_object *obj = cache->rq->batch->obj; + struct intel_gt_buffer_pool_node *pool; + struct i915_request *rq = cache->rq; + struct i915_vma *batch; + u32 *cmd; + int err; - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); - i915_gem_object_unpin_map(obj); + batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_pool; + } - intel_gt_chipset_flush(cache->rq->engine->gt); + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto out_pool; - i915_request_add(cache->rq); - cache->rq = NULL; + GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); + cmd = cache->rq_cmd + cache->rq_size; + *cmd++ = MI_ARB_CHECK; + if (cache->gen >= 8) + *cmd++ = MI_BATCH_BUFFER_START_GEN8; + else if (cache->gen >= 6) + *cmd++ = MI_BATCH_BUFFER_START; + else + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(batch->node.start); + *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ + i915_gem_object_flush_map(cache->rq_vma->obj); + i915_gem_object_unpin_map(cache->rq_vma->obj); + cache->rq_vma = NULL; + + err = intel_gt_buffer_pool_mark_active(pool, rq); + if (err == 0) { + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + } + i915_vma_unpin(batch); + if (err) + goto out_pool; + + cmd = i915_gem_object_pin_map(batch->obj, + cache->has_llc ? + I915_MAP_FORCE_WB : + I915_MAP_FORCE_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } + + /* Return with batch mapping (cmd) still pinned */ + cache->rq_cmd = cmd; + cache->rq_size = 0; + cache->rq_vma = batch; + +out_pool: + intel_gt_buffer_pool_put(pool); + return err; +} + +static unsigned int reloc_bb_flags(const struct reloc_cache *cache) +{ + return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; +} + +static int reloc_gpu_flush(struct reloc_cache *cache) +{ + struct i915_request *rq; + int err; + + rq = fetch_and_zero(&cache->rq); + if (!rq) + return 0; + + if (cache->rq_vma) { + struct drm_i915_gem_object *obj = cache->rq_vma->obj; + + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(obj, + 0, sizeof(u32) * cache->rq_size); + i915_gem_object_unpin_map(obj); + } + + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (!err) + err = rq->engine->emit_bb_start(rq, + rq->batch->node.start, + PAGE_SIZE, + reloc_bb_flags(cache)); + if (err) + i915_request_set_error_once(rq, err); + + intel_gt_chipset_flush(rq->engine->gt); + i915_request_add(rq); + + return err; } static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(cache); - if (!cache->vaddr) return; @@ -1109,17 +1285,17 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct i915_vma *vma, + struct intel_engine_cs *engine, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1132,7 +1308,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto out_pool; } - batch = i915_vma_instance(pool->obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1142,26 +1318,32 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + if (engine == eb->context->engine) { + rq = i915_request_create(eb->context); + } else { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + } if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; } - err = intel_engine_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); + err = intel_gt_buffer_pool_mark_active(pool, rq); if (err) goto err_request; - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -1176,6 +1358,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; + cache->rq_vma = batch; /* Return with batch mapping (cmd) still pinned */ goto out_pool; @@ -1189,124 +1372,206 @@ err_unpin: err_unmap: i915_gem_object_unpin_map(pool->obj); out_pool: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } +static bool reloc_can_use_engine(const struct intel_engine_cs *engine) +{ + return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6); +} + static u32 *reloc_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(cache); + int err; if (unlikely(!cache->rq)) { - int err; + struct intel_engine_cs *engine = eb->engine; - if (!intel_engine_can_store_dword(eb->engine)) - return ERR_PTR(-ENODEV); + if (!reloc_can_use_engine(engine)) { + engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; + if (!engine) + return ERR_PTR(-ENODEV); + } - err = __reloc_gpu_alloc(eb, vma, len); + err = __reloc_gpu_alloc(eb, engine, len); if (unlikely(err)) return ERR_PTR(err); } + if (vma != cache->target) { + err = reloc_move_to_gpu(cache->rq, vma); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + + cache->target = vma; + } + + if (unlikely(cache->rq_size + len > + PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { + err = reloc_gpu_chain(cache); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + } + + GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; return cmd; } -static u64 -relocate_entry(struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc, - struct i915_execbuffer *eb, - const struct i915_vma *target) +static inline bool use_reloc_gpu(struct i915_vma *vma) { - u64 offset = reloc->offset; - u64 target_offset = relocation_target(reloc, target); - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) + return true; - if (!eb->reloc_cache.vaddr && - (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !dma_resv_test_signaled_rcu(vma->resv, true))) { - const unsigned int gen = eb->reloc_cache.gen; - unsigned int len; - u32 *batch; - u64 addr; - - if (wide) - len = offset & 7 ? 8 : 5; - else if (gen >= 4) - len = 4; - else - len = 3; + if (DBG_FORCE_RELOC) + return false; - batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - goto repeat; + return !dma_resv_test_signaled_rcu(vma->resv, true); +} - addr = gen8_canonical_addr(vma->node.start + offset); - if (wide) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_offset); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - *batch++ = upper_32_bits(target_offset); - } - } else if (gen >= 6) { +static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) +{ + struct page *page; + unsigned long addr; + + GEM_BUG_ON(vma->pages != vma->obj->mm.pages); + + page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); + addr = PFN_PHYS(page_to_pfn(page)); + GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ + + return addr + offset_in_page(offset); +} + +static bool __reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (gen >= 8) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else + len = 3; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + return false; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (gen >= 8) { + if (offset & 7) { *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else if (gen >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + + addr = gen8_canonical_addr(addr + 4); + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_addr); } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_offset; + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + *batch++ = upper_32_bits(target_addr); } - - goto out; + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (IS_I965G(eb->i915)) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (gen >= 3 && + !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_addr; + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; } + return true; +} + +static bool reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + if (eb->reloc_cache.vaddr) + return false; + + if (!use_reloc_gpu(vma)) + return false; + + return __reloc_entry_gpu(eb, vma, offset, target_addr); +} + +static u64 +relocate_entry(struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, + const struct i915_vma *target) +{ + u64 target_addr = relocation_target(reloc, target); + u64 offset = reloc->offset; + + if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + repeat: - vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + vaddr = reloc_vaddr(vma->obj, + &eb->reloc_cache, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_offset), - eb->reloc_cache.vaddr); + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); - if (wide) { - offset += sizeof(u32); - target_offset >>= 32; - wide = false; - goto repeat; + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; + } } -out: return target->node.start | UPDATE; } @@ -1411,12 +1676,11 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; - struct drm_i915_gem_relocation_entry __user *urelocs; const struct drm_i915_gem_exec_object2 *entry = ev->exec; - unsigned int remain; + struct drm_i915_gem_relocation_entry __user *urelocs = + u64_to_user_ptr(entry->relocs_ptr); + unsigned long remain = entry->relocation_count; - urelocs = u64_to_user_ptr(entry->relocs_ptr); - remain = entry->relocation_count; if (unlikely(remain > N_RELOC(ULONG_MAX))) return -EINVAL; @@ -1425,13 +1689,13 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * to read. However, if the array is not writable the user loses * the updated relocation values. */ - if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) + if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))) return -EFAULT; do { struct drm_i915_gem_relocation_entry *r = stack; unsigned int count = - min_t(unsigned int, remain, ARRAY_SIZE(stack)); + min_t(unsigned long, remain, ARRAY_SIZE(stack)); unsigned int copied; /* @@ -1494,9 +1758,7 @@ static int eb_relocate(struct i915_execbuffer *eb) { int err; - mutex_lock(&eb->gem_context->mutex); err = eb_lookup_vmas(eb); - mutex_unlock(&eb->gem_context->mutex); if (err) return err; @@ -1509,15 +1771,20 @@ static int eb_relocate(struct i915_execbuffer *eb) /* The objects are in their final locations, apply the relocations. */ if (eb->args->flags & __EXEC_HAS_RELOC) { struct eb_vma *ev; + int flush; list_for_each_entry(ev, &eb->relocs, reloc_link) { err = eb_relocate_vma(eb, ev); if (err) - return err; + break; } + + flush = reloc_gpu_flush(&eb->reloc_cache); + if (!err) + err = flush; } - return 0; + return err; } static int eb_move_to_gpu(struct i915_execbuffer *eb) @@ -1597,19 +1864,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) err = i915_vma_move_to_active(vma, eb->request, flags); i915_vma_unlock(vma); - - __eb_unreserve_vma(vma, flags); - i915_vma_put(vma); - - ev->vma = NULL; + eb_unreserve_vma(ev); } ww_acquire_fini(&acquire); + eb_vma_array_put(fetch_and_zero(&eb->array)); + if (unlikely(err)) goto err_skip; - eb->exec = NULL; - /* Unconditionally flush any chipset caches (for streaming writes). */ intel_gt_chipset_flush(eb->engine->gt); return 0; @@ -1725,6 +1988,38 @@ static const struct dma_fence_work_ops eb_parse_ops = { .release = __eb_parse_release, }; +static inline int +__parser_mark_active(struct i915_vma *vma, + struct intel_timeline *tl, + struct dma_fence *fence) +{ + struct intel_gt_buffer_pool_node *node = vma->private; + + return i915_active_ref(&node->active, tl, fence); +} + +static int +parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) +{ + int err; + + mutex_lock(&tl->mutex); + + err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); + if (err) + goto unlock; + + if (pw->trampoline) { + err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); + if (err) + goto unlock; + } + +unlock: + mutex_unlock(&tl->mutex); + return err; +} + static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *shadow, struct i915_vma *trampoline) @@ -1759,20 +2054,25 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, pw->shadow = shadow; pw->trampoline = trampoline; + /* Mark active refs early for this worker, in case we get interrupted */ + err = parser_mark_active(pw, eb->context->timeline); + if (err) + goto err_commit; + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); if (err) - goto err_trampoline; + goto err_commit; err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_batch_unlock; + goto err_commit_unlock; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_batch_unlock; + goto err_commit_unlock; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); @@ -1784,14 +2084,16 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); dma_resv_unlock(shadow->resv); - dma_fence_work_commit(&pw->base); + dma_fence_work_commit_imm(&pw->base); return 0; -err_batch_unlock: +err_commit_unlock: dma_resv_unlock(pw->batch->resv); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); +err_commit: + i915_sw_fence_set_error_once(&pw->base.chain, err); + dma_fence_work_commit_imm(&pw->base); + return err; + err_shadow: i915_active_release(&shadow->active); err_batch: @@ -1804,7 +2106,7 @@ err_free: static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *shadow, *trampoline; unsigned int len; int err; @@ -1827,7 +2129,7 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_engine_get_pool(eb->engine, len); + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1837,6 +2139,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err; } i915_gem_object_set_readonly(shadow->obj); + shadow->private = pool; trampoline = NULL; if (CMDPARSER_USES_GGTT(eb->i915)) { @@ -1850,6 +2153,7 @@ static int eb_parse(struct i915_execbuffer *eb) shadow = trampoline; goto err_shadow; } + shadow->private = pool; eb->batch_flags |= I915_DISPATCH_SECURE; } @@ -1861,11 +2165,11 @@ static int eb_parse(struct i915_execbuffer *eb) eb->vma[eb->buffer_count].vma = i915_vma_get(shadow); eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN; eb->batch = &eb->vma[eb->buffer_count++]; + eb->vma[eb->buffer_count].vma = NULL; eb->trampoline = trampoline; eb->batch_start_offset = 0; - shadow->private = pool; return 0; err_trampoline: @@ -1874,7 +2178,7 @@ err_trampoline: err_shadow: i915_vma_unpin(shadow); err: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } @@ -2318,39 +2622,13 @@ static void eb_request_add(struct i915_execbuffer *eb) /* Check that the context wasn't destroyed before submission */ if (likely(!intel_context_is_closed(eb->context))) { attr = eb->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; } else { /* Serialise with context_close via the add_to_timeline */ i915_request_set_error_once(rq, -ENOENT); __i915_request_skip(rq); } - local_bh_disable(); __i915_request_queue(rq, &attr); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ /* Try to clean up the client's timeline after submitting the request */ if (prev) @@ -2369,7 +2647,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; - struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; struct i915_vma *batch; int out_fence_fd = -1; @@ -2386,8 +2663,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; - eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); - eb.vma[0].vma = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2414,30 +2689,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; - if (args->flags & I915_EXEC_FENCE_IN) { +#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) + if (args->flags & IN_FENCES) { + if ((args->flags & IN_FENCES) == IN_FENCES) + return -EINVAL; + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); if (!in_fence) return -EINVAL; } - - if (args->flags & I915_EXEC_FENCE_SUBMIT) { - if (in_fence) { - err = -EINVAL; - goto err_in_fence; - } - - exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!exec_fence) { - err = -EINVAL; - goto err_in_fence; - } - } +#undef IN_FENCES if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { err = out_fence_fd; - goto err_exec_fence; + goto err_in_fence; } } @@ -2528,14 +2795,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, } if (in_fence) { - err = i915_request_await_dma_fence(eb.request, in_fence); - if (err < 0) - goto err_request; - } - - if (exec_fence) { - err = i915_request_await_execution(eb.request, exec_fence, - eb.engine->bond_execute); + if (args->flags & I915_EXEC_FENCE_SUBMIT) + err = i915_request_await_execution(eb.request, + in_fence, + eb.engine->bond_execute); + else + err = i915_request_await_dma_fence(eb.request, + in_fence); if (err < 0) goto err_request; } @@ -2563,7 +2829,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = batch; if (batch->private) - intel_engine_pool_mark_active(batch->private, eb.request); + intel_gt_buffer_pool_mark_active(batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); @@ -2592,10 +2858,8 @@ err_batch_unpin: i915_vma_unpin(batch); err_parse: if (batch->private) - intel_engine_pool_put(batch->private); + intel_gt_buffer_pool_put(batch->private); err_vma: - if (eb.exec) - eb_release_vmas(&eb); if (eb.trampoline) i915_vma_unpin(eb.trampoline); eb_unpin_engine(&eb); @@ -2606,8 +2870,6 @@ err_destroy: err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); -err_exec_fence: - dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); return err; @@ -2615,7 +2877,7 @@ err_in_fence: static size_t eb_element_size(void) { - return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); + return sizeof(struct drm_i915_gem_exec_object2); } static bool check_buffer_count(size_t count) @@ -2671,7 +2933,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count + 1, eb_element_size(), + exec2_list = kvmalloc_array(count, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { drm_dbg(&i915->drm, @@ -2749,8 +3011,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (err) return err; - /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(count + 1, eb_element_size(), + exec2_list = kvmalloc_array(count, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", @@ -2794,7 +3055,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, * And this range already got effectively checked earlier * when we did the "copy_from_user()" above. */ - if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) + if (!user_write_access_begin(user_exec_list, + count * sizeof(*user_exec_list))) goto end; for (i = 0; i < args->buffer_count; i++) { @@ -2808,7 +3070,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, end_user); } end_user: - user_access_end(); + user_write_access_end(); end:; } @@ -2817,3 +3079,7 @@ end:; kvfree(exec2_list); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_execbuffer.c" +#endif |