summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVille Syrjälä <ville.syrjala@linux.intel.com>2022-02-18 17:47:09 +0200
committerVille Syrjälä <ville.syrjala@linux.intel.com>2022-02-18 17:47:09 +0200
commit3c4bc522478def9883dff4c957574ddf6115c5c7 (patch)
treedcb9e9de0272059e4ce96881f0e6d3524a701bf7
parentb0209a73d29dc65cbc47eefca8af359cd53eb6ba (diff)
parentedf176f48d87fa25ca93f09362707cf5314bf7ee (diff)
Merge remote-tracking branch 'drm_intel_push/drm-intel-gt-next' into drm-tip
# Conflicts: # drivers/gpu/drm/i915/i915_drv.h # drivers/gpu/drm/i915/i915_reg.h
-rw-r--r--Documentation/gpu/i915.rst1
-rw-r--r--drivers/gpu/drm/i915/Makefile2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c33
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c11
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c239
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c37
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h51
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c30
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c8
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c13
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c39
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c192
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c25
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c19
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c133
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c32
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c89
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h80
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h82
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c126
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h20
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c204
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c143
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c38
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h37
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c148
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c43
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c2
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c7
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h27
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c37
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c101
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c16
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h4
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c87
-rw-r--r--drivers/gpu/drm/i915/i915_module.c3
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c1
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c12
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h4
-rw-r--r--drivers/gpu/drm/i915/i915_request.c12
-rw-r--r--drivers/gpu/drm/i915/i915_request.h6
-rw-r--r--drivers/gpu/drm/i915/i915_vgpu.c2
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c569
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h34
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c418
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h234
-rw-r--r--drivers/gpu/drm/i915/i915_vma_snapshot.c134
-rw-r--r--drivers/gpu/drm/i915/i915_vma_snapshot.h112
-rw-r--r--drivers/gpu/drm/i915/i915_vma_types.h19
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h4
-rw-r--r--drivers/gpu/drm/i915/intel_step.c15
-rw-r--r--drivers/gpu/drm/i915/intel_wopcm.c42
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c28
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c209
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c119
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c30
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_flush_test.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c11
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c21
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.h3
103 files changed, 3180 insertions, 1420 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index b7d801993bfa..bcaefc952764 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -539,6 +539,7 @@ GuC ABI
.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
HuC
---
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7686f46e42a4..9d588d936e3d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -175,7 +175,7 @@ i915-y += \
i915_trace_points.o \
i915_ttm_buddy_manager.o \
i915_vma.o \
- i915_vma_snapshot.o \
+ i915_vma_resource.o \
intel_wopcm.o
# general-purpose microcontroller (GuC) support
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 2b8e89477f48..05dd7dba3a5c 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -50,7 +50,7 @@ static void dpt_insert_page(struct i915_address_space *vm,
}
static void dpt_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level level,
u32 flags)
{
@@ -66,8 +66,8 @@ static void dpt_insert_entries(struct i915_address_space *vm,
* not to allow the user to override access to a read only page.
*/
- i = vma->node.start / I915_GTT_PAGE_SIZE;
- for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+ i = vma_res->start / I915_GTT_PAGE_SIZE;
+ for_each_sgt_daddr(addr, sgt_iter, vma_res->bi.pages)
gen8_set_pte(&base[i++], pte_encode | addr);
}
@@ -78,35 +78,38 @@ static void dpt_clear_range(struct i915_address_space *vm,
static void dpt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
- struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
+ if (vma_res->bound_flags)
+ return;
+
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
- if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj))
+ if (vm->has_read_only && vma_res->bi.readonly)
pte_flags |= PTE_READ_ONLY;
- if (i915_gem_object_is_lmem(obj))
+ if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, cache_level, pte_flags);
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
/*
* Without aliasing PPGTT there's no difference between
* GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
* upgrade to both bound if we bind either to avoid double-binding.
*/
- atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+ vma_res->bound_flags = I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
}
-static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void dpt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res)
{
- vm->clear_range(vm, vma->node.start, vma->size);
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
static void dpt_cleanup(struct i915_address_space *vm)
@@ -252,7 +255,11 @@ intel_dpt_create(struct intel_framebuffer *fb)
if (IS_ERR(dpt_obj))
return ERR_CAST(dpt_obj);
- ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
+ ret = i915_gem_object_lock_interruptible(dpt_obj, NULL);
+ if (!ret) {
+ ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
+ i915_gem_object_unlock(dpt_obj);
+ }
if (ret) {
i915_gem_object_put(dpt_obj);
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index e60046d90124..a307b4993bcf 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -37,7 +37,11 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
- ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
+ if (!ret) {
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+ i915_gem_object_unlock(obj);
+ }
if (ret) {
vma = ERR_PTR(ret);
goto err;
@@ -48,7 +52,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
goto err;
if (i915_vma_misplaced(vma, 0, alignment, 0)) {
- ret = i915_vma_unbind(vma);
+ ret = i915_vma_unbind_unlocked(vma);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 3ec34de35460..87f4af3fd523 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -605,7 +605,7 @@ static void ivb_fbc_activate(struct intel_fbc *fbc)
else if (DISPLAY_VER(i915) == 9)
skl_fbc_program_cfb_stride(fbc);
- if (i915->ggtt.num_fences)
+ if (to_gt(i915)->ggtt->num_fences)
snb_fbc_program_fence(fbc);
intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id),
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 837484cbcd33..fd5bc7acf08d 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -197,7 +197,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
struct drm_device *dev = helper->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
const struct i915_ggtt_view view = {
.type = I915_GGTT_VIEW_NORMAL,
};
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index f2fbb196d62a..d7b1de4cc205 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -46,17 +46,18 @@ static struct i915_vma *
initial_plane_vma(struct drm_i915_private *i915,
struct intel_initial_plane_config *plane_config)
{
+ struct intel_memory_region *mem = i915->mm.stolen_region;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u32 base, size;
- if (plane_config->size == 0)
+ if (!mem || plane_config->size == 0)
return NULL;
base = round_down(plane_config->base,
I915_GTT_MIN_ALIGNMENT);
size = round_up(plane_config->base + plane_config->size,
- I915_GTT_MIN_ALIGNMENT);
+ mem->min_page_size);
size -= base;
/*
@@ -94,7 +95,7 @@ initial_plane_vma(struct drm_i915_private *i915,
goto err_obj;
}
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
if (IS_ERR(vma))
goto err_obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 2958e2be4292..bc6d59df064d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -587,10 +587,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
struct intel_engine_cs **siblings = NULL;
intel_engine_mask_t prev_mask;
- /* FIXME: This is NIY for execlists */
- if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
- return -ENODEV;
-
if (get_user(slot, &ext->engine_index))
return -EFAULT;
@@ -600,6 +596,13 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
if (get_user(num_siblings, &ext->num_siblings))
return -EFAULT;
+ if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc) &&
+ num_siblings != 1) {
+ drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n",
+ num_siblings);
+ return -EINVAL;
+ }
+
if (slot >= set->num_engines) {
drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
slot, set->num_engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index babfecb17ad1..e5b0f66ea1fe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -174,7 +174,7 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx)
vm = ctx->vm;
if (!vm)
- vm = &ctx->i915->ggtt.vm;
+ vm = &to_gt(ctx->i915)->ggtt->vm;
vm = i915_vm_get(vm);
return vm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 0a4ecf71ee08..d42f437149c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -32,7 +32,6 @@
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -444,7 +443,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
else
pin_flags = entry->offset & PIN_OFFSET_MASK;
- pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+ pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
@@ -462,17 +461,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
entry->pad_to_size,
entry->alignment,
eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT);
+ PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
if (unlikely(err))
return err;
}
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -488,13 +485,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
static inline void
eb_unreserve_vma(struct eb_vma *ev)
{
- if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
- return;
-
if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
__i915_vma_unpin_fence(ev->vma);
- __i915_vma_unpin(ev->vma);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
}
@@ -676,10 +669,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -691,85 +682,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
return 0;
}
-static int eb_reserve(struct i915_execbuffer *eb)
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
{
const unsigned int count = eb->buffer_count;
- unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
+ unsigned int i;
struct list_head last;
+ bool unpinned = false;
+
+ /* Resort *all* the objects into priority order */
+ INIT_LIST_HEAD(&eb->unbound);
+ INIT_LIST_HEAD(&last);
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+ unsigned int flags = ev->flags;
+
+ if (!force && flags & EXEC_OBJECT_PINNED &&
+ flags & __EXEC_OBJECT_HAS_PIN)
+ continue;
+
+ unpinned = true;
+ eb_unreserve_vma(ev);
+
+ if (flags & EXEC_OBJECT_PINNED)
+ /* Pinned must have their slot */
+ list_add(&ev->bind_link, &eb->unbound);
+ else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+ /* Map require the lowest 256MiB (aperture) */
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ /* Prioritise 4GiB region for restricted bo */
+ list_add(&ev->bind_link, &last);
+ else
+ list_add_tail(&ev->bind_link, &last);
+ }
+
+ list_splice_tail(&last, &eb->unbound);
+ return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
struct eb_vma *ev;
- unsigned int i, pass;
+ unsigned int pass;
int err = 0;
+ bool unpinned;
/*
* Attempt to pin all of the buffers into the GTT.
- * This is done in 3 phases:
+ * This is done in 2 phases:
*
- * 1a. Unbind all objects that do not match the GTT constraints for
- * the execbuffer (fenceable, mappable, alignment etc).
- * 1b. Increment pin count for already bound objects.
- * 2. Bind new objects.
- * 3. Decrement pin count.
+ * 1. Unbind all objects that do not match the GTT constraints for
+ * the execbuffer (fenceable, mappable, alignment etc).
+ * 2. Bind new objects.
*
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
+ *
+ * Defragmenting is skipped if all objects are pinned at a fixed location.
*/
- pass = 0;
- do {
- list_for_each_entry(ev, &eb->unbound, bind_link) {
- err = eb_reserve_vma(eb, ev, pin_flags);
- if (err)
- break;
- }
- if (err != -ENOSPC)
- return err;
+ for (pass = 0; pass <= 2; pass++) {
+ int pin_flags = PIN_USER | PIN_VALIDATE;
- /* Resort *all* the objects into priority order */
- INIT_LIST_HEAD(&eb->unbound);
- INIT_LIST_HEAD(&last);
- for (i = 0; i < count; i++) {
- unsigned int flags;
+ if (pass == 0)
+ pin_flags |= PIN_NONBLOCK;
- ev = &eb->vma[i];
- flags = ev->flags;
- if (flags & EXEC_OBJECT_PINNED &&
- flags & __EXEC_OBJECT_HAS_PIN)
- continue;
+ if (pass >= 1)
+ unpinned = eb_unbind(eb, pass == 2);
- eb_unreserve_vma(ev);
-
- if (flags & EXEC_OBJECT_PINNED)
- /* Pinned must have their slot */
- list_add(&ev->bind_link, &eb->unbound);
- else if (flags & __EXEC_OBJECT_NEEDS_MAP)
- /* Map require the lowest 256MiB (aperture) */
- list_add_tail(&ev->bind_link, &eb->unbound);
- else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- /* Prioritise 4GiB region for restricted bo */
- list_add(&ev->bind_link, &last);
- else
- list_add_tail(&ev->bind_link, &last);
- }
- list_splice_tail(&last, &eb->unbound);
-
- switch (pass++) {
- case 0:
- break;
-
- case 1:
- /* Too fragmented, unbind everything and retry */
- mutex_lock(&eb->context->vm->mutex);
- err = i915_gem_evict_vm(eb->context->vm);
- mutex_unlock(&eb->context->vm->mutex);
+ if (pass == 2) {
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+ mutex_unlock(&eb->context->vm->mutex);
+ }
if (err)
return err;
- break;
+ }
- default:
- return -ENOSPC;
+ list_for_each_entry(ev, &eb->unbound, bind_link) {
+ err = eb_reserve_vma(eb, ev, pin_flags);
+ if (err)
+ break;
}
- pin_flags = PIN_USER;
- } while (1);
+ if (err != -ENOSPC)
+ break;
+ }
+
+ return err;
}
static int eb_select_context(struct i915_execbuffer *eb)
@@ -1098,7 +1099,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
{
struct drm_i915_private *i915 =
container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
- return &i915->ggtt;
+ return to_gt(i915)->ggtt;
}
static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1217,10 +1218,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
return vaddr;
}
-static void *reloc_iomap(struct drm_i915_gem_object *obj,
+static void *reloc_iomap(struct i915_vma *batch,
struct i915_execbuffer *eb,
unsigned long page)
{
+ struct drm_i915_gem_object *obj = batch->obj;
struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
@@ -1230,7 +1232,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
- struct i915_vma *vma;
+ struct i915_vma *vma = ERR_PTR(-ENODEV);
int err;
if (i915_gem_object_is_tiled(obj))
@@ -1243,10 +1245,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ /*
+ * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+ * VMA from the object list because we no longer pin.
+ *
+ * Only attempt to pin the batch buffer to ggtt if the current batch
+ * is not inside ggtt, or the batch buffer is not misplaced.
+ */
+ if (!i915_is_ggtt(batch->vm)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ } else if (i915_vma_is_map_and_fenceable(batch)) {
+ __i915_vma_pin(batch);
+ vma = batch;
+ }
+
if (vma == ERR_PTR(-EDEADLK))
return vma;
@@ -1284,7 +1299,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
return vaddr;
}
-static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+static void *reloc_vaddr(struct i915_vma *vma,
struct i915_execbuffer *eb,
unsigned long page)
{
@@ -1296,9 +1311,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, eb, page);
+ vaddr = reloc_iomap(vma, eb, page);
if (!vaddr)
- vaddr = reloc_kmap(obj, cache, page);
+ vaddr = reloc_kmap(vma->obj, cache, page);
}
return vaddr;
@@ -1339,7 +1354,7 @@ relocate_entry(struct i915_vma *vma,
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj, eb,
+ vaddr = reloc_vaddr(vma, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1414,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
- PIN_GLOBAL, NULL);
+ PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
@@ -1944,7 +1959,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count, j;
- struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1954,11 +1968,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;
- vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
- if (!vsnap)
- continue;
-
- i915_vma_snapshot_init(vsnap, vma, "user");
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
@@ -1967,10 +1976,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
continue;
capture->next = eb->capture_lists[j];
- capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+ capture->vma_res = i915_vma_resource_get(vma->resource);
eb->capture_lists[j] = capture;
}
- i915_vma_snapshot_put(vsnap);
}
}
@@ -2201,7 +2209,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
if (err)
return ERR_PTR(err);
@@ -2215,7 +2223,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (eb->batch_flags & I915_DISPATCH_SECURE)
- return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
return NULL;
}
@@ -2266,13 +2274,12 @@ static int eb_parse(struct i915_execbuffer *eb)
err = i915_gem_object_lock(pool->obj, &eb->ww);
if (err)
- goto err;
+ return err;
shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- goto err;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
intel_gt_buffer_pool_mark_used(pool);
i915_gem_object_set_readonly(shadow->obj);
shadow->private = pool;
@@ -2284,25 +2291,21 @@ static int eb_parse(struct i915_execbuffer *eb)
shadow = shadow_batch_pin(eb, pool->obj,
&eb->gt->ggtt->vm,
PIN_GLOBAL);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- shadow = trampoline;
- goto err_shadow;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
shadow->private = pool;
eb->batch_flags |= I915_DISPATCH_SECURE;
}
batch = eb_dispatch_secure(eb, shadow);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_trampoline;
- }
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
if (err)
- goto err_trampoline;
+ return err;
err = intel_engine_cmd_parser(eb->context->engine,
eb->batches[0]->vma,
@@ -2310,7 +2313,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_len[0],
shadow, trampoline);
if (err)
- goto err_unpin_batch;
+ return err;
eb->batches[0] = &eb->vma[eb->buffer_count++];
eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2329,17 +2332,6 @@ secure_batch:
eb->batches[0]->vma = i915_vma_get(batch);
}
return 0;
-
-err_unpin_batch:
- if (batch)
- i915_vma_unpin(batch);
-err_trampoline:
- if (trampoline)
- i915_vma_unpin(trampoline);
-err_shadow:
- i915_vma_unpin(shadow);
-err:
- return err;
}
static int eb_request_submit(struct i915_execbuffer *eb,
@@ -3278,9 +3270,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
* _onstack interface.
*/
if (eb->batches[i]->vma)
- i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
- eb->batches[i]->vma,
- "batch");
+ eb->requests[i]->batch_res =
+ i915_vma_resource_get(eb->batches[i]->vma->resource);
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3465,8 +3456,6 @@ err_request:
err_vma:
eb_release_vmas(&eb, true);
- if (eb.trampoline)
- i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
i915_gem_ww_ctx_fini(&eb.ww);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a3b60e1ede90..efe69d6b86f4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -15,6 +15,7 @@
#include "gt/intel_gt_requests.h"
#include "i915_drv.h"
+#include "i915_gem_evict.h"
#include "i915_gem_gtt.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@@ -297,7 +298,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *i915 = to_i915(dev);
struct intel_runtime_pm *rpm = &i915->runtime_pm;
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
@@ -360,8 +361,21 @@ retry:
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
}
- /* The entire mappable GGTT is pinned? Unexpected! */
- GEM_BUG_ON(vma == ERR_PTR(-ENOSPC));
+ /*
+ * The entire mappable GGTT is pinned? Unexpected!
+ * Try to evict the object we locked too, as normally we skip it
+ * due to lack of short term pinning inside execbuf.
+ */
+ if (vma == ERR_PTR(-ENOSPC)) {
+ ret = mutex_lock_interruptible(&ggtt->vm.mutex);
+ if (!ret) {
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+ mutex_unlock(&ggtt->vm.mutex);
+ }
+ if (ret)
+ goto err_reset;
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+ }
}
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -390,16 +404,16 @@ retry:
assert_rpm_wakelock_held(rpm);
/* Mark as being mmapped into userspace for later revocation */
- mutex_lock(&i915->ggtt.vm.mutex);
+ mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
- list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
- mutex_unlock(&i915->ggtt.vm.mutex);
+ list_add(&obj->userfault_link, &to_gt(i915)->ggtt->userfault_list);
+ mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
/* Track the mmo associated with the fenced vma */
vma->mmo = mmo;
if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
- intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+ intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
if (write) {
@@ -514,7 +528,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
* wakeref.
*/
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- mutex_lock(&i915->ggtt.vm.mutex);
+ mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
if (!obj->userfault_count)
goto out;
@@ -532,7 +546,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
wmb();
out:
- mutex_unlock(&i915->ggtt.vm.mutex);
+ mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
@@ -738,13 +752,14 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
u32 handle,
u64 *offset)
{
+ struct drm_i915_private *i915 = to_i915(dev);
enum i915_mmap_type mmap_type;
if (HAS_LMEM(to_i915(dev)))
mmap_type = I915_MMAP_TYPE_FIXED;
else if (pat_enabled())
mmap_type = I915_MMAP_TYPE_WC;
- else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
+ else if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return -ENODEV;
else
mmap_type = I915_MMAP_TYPE_GTT;
@@ -792,7 +807,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
switch (args->flags) {
case I915_MMAP_OFFSET_GTT:
- if (!i915_ggtt_has_aperture(&i915->ggtt))
+ if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return -ENODEV;
type = I915_MMAP_TYPE_GTT;
break;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 9a478c19c477..2d593d573ef1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -285,6 +285,12 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
GEM_BUG_ON(vma->obj != obj);
spin_unlock(&obj->vma.lock);
+ /* Verify that the vma is unbound under the vm mutex. */
+ mutex_lock(&vma->vm->mutex);
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ __i915_vma_unbind(vma);
+ mutex_unlock(&vma->vm->mutex);
+
__i915_vma_put(vma);
spin_lock(&obj->vma.lock);
@@ -761,6 +767,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
return dma_fence_get(i915_gem_to_ttm(obj)->moving);
}
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+ struct dma_fence *fence)
+{
+ struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
+
+ if (*moving == fence)
+ return;
+
+ dma_fence_put(*moving);
+ *moving = dma_fence_get(fence);
+}
+
/**
* i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
* @obj: The object whose moving fence to wait for.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index f66d46882ea7..02c37fe4a535 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -459,7 +459,6 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_pin_map - return a contiguous mapping of the entire object
@@ -524,6 +523,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
struct dma_fence *
i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+ struct dma_fence *fence);
+
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
bool intr);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 0dd107dcecc2..0098a32490f0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -15,6 +15,7 @@
#include "i915_active.h"
#include "i915_selftest.h"
+#include "i915_vma_resource.h"
struct drm_i915_gem_object;
struct intel_fronbuffer;
@@ -57,10 +58,26 @@ struct drm_i915_gem_object_ops {
void (*put_pages)(struct drm_i915_gem_object *obj,
struct sg_table *pages);
int (*truncate)(struct drm_i915_gem_object *obj);
- void (*writeback)(struct drm_i915_gem_object *obj);
- int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
- bool no_gpu_wait,
- bool should_writeback);
+ /**
+ * shrink - Perform further backend specific actions to facilate
+ * shrinking.
+ * @obj: The gem object
+ * @flags: Extra flags to control shrinking behaviour in the backend
+ *
+ * Possible values for @flags:
+ *
+ * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
+ * backing pages, if supported.
+ *
+ * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
+ * idle. Active objects can be considered later. The TTM backend for
+ * example might have aync migrations going on, which don't use any
+ * i915_vma to track the active GTT binding, and hence having an unbound
+ * object might not be enough.
+ */
+#define I915_GEM_OBJECT_SHRINK_WRITEBACK BIT(0)
+#define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
+ int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
int (*pread)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pread *arg);
@@ -551,31 +568,7 @@ struct drm_i915_gem_object {
struct sg_table *pages;
void *mapping;
- struct i915_page_sizes {
- /**
- * The sg mask of the pages sg_table. i.e the mask of
- * of the lengths for each sg entry.
- */
- unsigned int phys;
-
- /**
- * The gtt page sizes we are allowed to use given the
- * sg mask and the supported page sizes. This will
- * express the smallest unit we can use for the whole
- * object, as well as the larger sizes we may be able
- * to use opportunistically.
- */
- unsigned int sg;
-
- /**
- * The actual gtt page size usage. Since we can have
- * multiple vma associated with this object we need to
- * prevent any trampling of state, hence a copy of this
- * struct also lives in each vma, therefore the gtt
- * value here should only be read/write through the vma.
- */
- unsigned int gtt;
- } page_sizes;
+ struct i915_page_sizes page_sizes;
I915_SELFTEST_DECLARE(unsigned int page_mask);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 022582a1ca17..183b861620b8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -171,16 +171,6 @@ int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
return 0;
}
-/* Try to discard unwanted pages */
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
-{
- assert_object_held_shared(obj);
- GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
- if (obj->ops->writeback)
- obj->ops->writeback(obj);
-}
-
static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
{
struct radix_tree_iter iter;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index e81e5ac8d376..00359ec9d58b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -24,7 +24,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
{
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
- intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
+ intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref, 0);
flush_workqueue(i915->wq);
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 60f962f0c041..4efa821f3cb1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -334,6 +334,21 @@ shmem_writeback(struct drm_i915_gem_object *obj)
__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
}
+static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+ switch (obj->mm.madv) {
+ case I915_MADV_DONTNEED:
+ return i915_gem_object_truncate(obj);
+ case __I915_MADV_PURGED:
+ return 0;
+ }
+
+ if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
+ shmem_writeback(obj);
+
+ return 0;
+}
+
void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
@@ -506,7 +521,7 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
.get_pages = shmem_get_pages,
.put_pages = shmem_put_pages,
.truncate = shmem_truncate,
- .writeback = shmem_writeback,
+ .shrink = shmem_shrink,
.pwrite = shmem_pwrite,
.pread = shmem_pread,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index cc927e49d21f..6a6ff98a8746 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -57,21 +57,17 @@ static int drop_pages(struct drm_i915_gem_object *obj,
static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
{
- if (obj->ops->shrinker_release_pages)
- return obj->ops->shrinker_release_pages(obj,
- !(flags & I915_SHRINK_ACTIVE),
- flags & I915_SHRINK_WRITEBACK);
-
- switch (obj->mm.madv) {
- case I915_MADV_DONTNEED:
- i915_gem_object_truncate(obj);
- return 0;
- case __I915_MADV_PURGED:
- return 0;
- }
+ if (obj->ops->shrink) {
+ unsigned int shrink_flags = 0;
+
+ if (!(flags & I915_SHRINK_ACTIVE))
+ shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
- if (flags & I915_SHRINK_WRITEBACK)
- i915_gem_object_writeback(obj);
+ if (flags & I915_SHRINK_WRITEBACK)
+ shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
+
+ return obj->ops->shrink(obj, shrink_flags);
+ }
return 0;
}
@@ -401,9 +397,9 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
I915_SHRINK_VMAPS);
/* We also want to clear any cached iomaps as they wrap vmap */
- mutex_lock(&i915->ggtt.vm.mutex);
+ mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
list_for_each_entry_safe(vma, next,
- &i915->ggtt.vm.bound_list, vm_link) {
+ &to_gt(i915)->ggtt->vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT;
struct drm_i915_gem_object *obj = vma->obj;
@@ -418,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
i915_gem_object_unlock(obj);
}
- mutex_unlock(&i915->ggtt.vm.mutex);
+ mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
*(unsigned long *)ptr += freed_pages;
return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0272b24c5c57..b9c3196b91ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -73,7 +73,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
static int i915_adjust_stolen(struct drm_i915_private *i915,
struct resource *dsm)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct resource *r;
@@ -584,6 +584,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages =
i915_pages_create_for_stolen(obj->base.dev,
obj->stolen->start,
@@ -591,7 +592,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
- dbg_poison(&to_i915(obj->base.dev)->ggtt,
+ dbg_poison(to_gt(i915)->ggtt,
sg_dma_address(pages->sgl),
sg_dma_len(pages->sgl),
POISON_INUSE);
@@ -604,9 +605,10 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
/* Should only be called from i915_gem_object_release_stolen() */
- dbg_poison(&to_i915(obj->base.dev)->ggtt,
+ dbg_poison(to_gt(i915)->ggtt,
sg_dma_address(pages->sgl),
sg_dma_len(pages->sgl),
POISON_FREE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 0e0e4805161a..d6adda5bf96b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -183,7 +183,8 @@ static int
i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
int tiling_mode, unsigned int stride)
{
- struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct i915_vma *vma, *vn;
LIST_HEAD(unbind);
int ret = 0;
@@ -338,7 +339,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int err;
- if (!dev_priv->ggtt.num_fences)
+ if (!to_gt(dev_priv)->ggtt->num_fences)
return -EOPNOTSUPP;
obj = i915_gem_object_lookup(file, args->handle);
@@ -364,9 +365,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
args->stride = 0;
} else {
if (args->tiling_mode == I915_TILING_X)
- args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
+ args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
else
- args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
+ args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
@@ -421,7 +422,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int err = -ENOENT;
- if (!dev_priv->ggtt.num_fences)
+ if (!to_gt(dev_priv)->ggtt->num_fences)
return -EOPNOTSUPP;
rcu_read_lock();
@@ -437,10 +438,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
switch (args->tiling_mode) {
case I915_TILING_X:
- args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
+ args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
break;
case I915_TILING_Y:
- args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
+ args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
break;
default:
case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 68f758ebbd4b..8419096d4056 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -426,16 +426,14 @@ int i915_ttm_purge(struct drm_i915_gem_object *obj)
return 0;
}
-static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
- bool no_wait_gpu,
- bool should_writeback)
+static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct i915_ttm_tt *i915_tt =
container_of(bo->ttm, typeof(*i915_tt), ttm);
struct ttm_operation_ctx ctx = {
.interruptible = true,
- .no_wait_gpu = no_wait_gpu,
+ .no_wait_gpu = flags & I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT,
};
struct ttm_placement place = {};
int ret;
@@ -469,7 +467,7 @@ static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
return ret;
}
- if (should_writeback)
+ if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
return 0;
@@ -977,7 +975,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
.truncate = i915_ttm_truncate,
- .shrinker_release_pages = i915_ttm_shrinker_release_pages,
+ .shrink = i915_ttm_shrink,
.adjust_lru = i915_ttm_adjust_lru,
.delayed_free = i915_ttm_delayed_free,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index e130c820ae4e..1ebe6e4086a1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret;
- ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ /*
+ * Note: The async unbinding here will actually transform the
+ * blocking wait for unbind into a wait before finally submitting
+ * evict / migration blit and thus stall the migration timeline
+ * which may not be good for overall throughput. We should make
+ * sure we await the unbind fences *after* the migration blit
+ * instead of *before* as we currently do.
+ */
+ ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+ I915_GEM_OBJECT_UNBIND_ASYNC);
if (ret)
return ret;
@@ -531,7 +540,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return ret;
}
- migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+ migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm,
dst_rsgt, true, &deps);
i915_deps_fini(&deps);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6af237aa1854..8424ee8c5eb8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -371,9 +371,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
err = -EINVAL;
}
- if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
+ if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
- vma->page_sizes.gtt & ~supported, supported);
+ vma->resource->page_sizes_gtt & ~supported, supported);
err = -EINVAL;
}
@@ -404,15 +404,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
if (i915_gem_object_is_lmem(obj) &&
IS_ALIGNED(vma->node.start, SZ_2M) &&
vma->page_sizes.sg & SZ_2M &&
- vma->page_sizes.gtt < SZ_2M) {
+ vma->resource->page_sizes_gtt < SZ_2M) {
pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
- vma->page_sizes.sg, vma->page_sizes.gtt);
- err = -EINVAL;
- }
-
- if (obj->mm.page_sizes.gtt) {
- pr_err("obj->page_sizes.gtt(%u) should never be set\n",
- obj->mm.page_sizes.gtt);
+ vma->page_sizes.sg, vma->resource->page_sizes_gtt);
err = -EINVAL;
}
@@ -548,9 +542,9 @@ static int igt_mock_memory_region_huge_pages(void *arg)
goto out_unpin;
}
- if (vma->page_sizes.gtt != page_size) {
+ if (vma->resource->page_sizes_gtt != page_size) {
pr_err("%s page_sizes.gtt=%u, expected=%u\n",
- __func__, vma->page_sizes.gtt,
+ __func__, vma->resource->page_sizes_gtt,
page_size);
err = -EINVAL;
goto out_unpin;
@@ -631,9 +625,9 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
err = igt_check_page_sizes(vma);
- if (vma->page_sizes.gtt != page_size) {
+ if (vma->resource->page_sizes_gtt != page_size) {
pr_err("page_sizes.gtt=%u, expected %u\n",
- vma->page_sizes.gtt, page_size);
+ vma->resource->page_sizes_gtt, page_size);
err = -EINVAL;
}
@@ -648,7 +642,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
* pages.
*/
for (offset = 4096; offset < page_size; offset += 4096) {
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err)
goto out_unpin;
@@ -658,9 +652,10 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
err = igt_check_page_sizes(vma);
- if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
+ if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
pr_err("page_sizes.gtt=%u, expected %llu\n",
- vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
+ vma->resource->page_sizes_gtt,
+ I915_GTT_PAGE_SIZE_4K);
err = -EINVAL;
}
@@ -806,9 +801,9 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
}
}
- if (vma->page_sizes.gtt != expected_gtt) {
+ if (vma->resource->page_sizes_gtt != expected_gtt) {
pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
- vma->page_sizes.gtt, expected_gtt,
+ vma->resource->page_sizes_gtt, expected_gtt,
obj->base.size, yesno(!!single));
err = -EINVAL;
break;
@@ -962,10 +957,10 @@ static int igt_mock_ppgtt_64K(void *arg)
}
}
- if (vma->page_sizes.gtt != expected_gtt) {
+ if (vma->resource->page_sizes_gtt != expected_gtt) {
pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
- vma->page_sizes.gtt, expected_gtt, i,
- yesno(!!single));
+ vma->resource->page_sizes_gtt,
+ expected_gtt, i, yesno(!!single));
err = -EINVAL;
goto out_vma_unpin;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 0be86ffb7c19..8f28e46e8ee5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -319,7 +319,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr)
int err;
if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err)
return err;
}
@@ -544,7 +544,7 @@ static bool has_bit17_swizzle(int sw)
static bool bad_swizzling(struct drm_i915_private *i915)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
return true;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 85bf9ba727d7..bd60d42238fb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1376,7 +1376,7 @@ static int igt_ctx_readonly(void *arg)
goto out_file;
}
- vm = ctx->vm ?: &i915->ggtt.alias->vm;
+ vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
if (!vm || !vm->has_read_only) {
err = 0;
goto out_file;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index ecb691c81d1e..d534141b2cf7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,8 +4,13 @@
*/
#include "gt/intel_migrate.h"
+#include "gt/intel_gpu_commands.h"
#include "gem/i915_gem_ttm_move.h"
+#include "i915_deps.h"
+
+#include "selftests/igt_spinner.h"
+
static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
bool fill)
{
@@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg)
}
static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
- struct drm_i915_gem_object *obj)
+ struct drm_i915_gem_object *obj,
+ struct i915_vma *vma)
{
int err;
@@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
if (err)
return err;
+ if (vma) {
+ err = i915_vma_pin_ww(vma, ww, obj->base.size, 0,
+ 0UL | PIN_OFFSET_FIXED |
+ PIN_USER);
+ if (err) {
+ if (err != -EINTR && err != ERESTARTSYS &&
+ err != -EDEADLK)
+ pr_err("Failed to pin vma.\n");
+ return err;
+ }
+
+ i915_vma_unpin(vma);
+ }
+
+ /*
+ * Migration will implicitly unbind (asynchronously) any bound
+ * vmas.
+ */
if (i915_gem_object_is_lmem(obj)) {
err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
if (err) {
@@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
return err;
}
-static int igt_lmem_pages_migrate(void *arg)
+static int __igt_lmem_pages_migrate(struct intel_gt *gt,
+ struct i915_address_space *vm,
+ struct i915_deps *deps,
+ struct igt_spinner *spin,
+ struct dma_fence *spin_fence)
{
- struct intel_gt *gt = arg;
struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
+ struct i915_vma *vma = NULL;
struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
@@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg)
if (IS_ERR(obj))
return PTR_ERR(obj);
+ if (vm) {
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+ }
+
/* Initial GPU fill, sync, CPU initialization. */
for_i915_gem_ww(&ww, err, true) {
err = i915_gem_object_lock(obj, &ww);
@@ -175,25 +211,23 @@ static int igt_lmem_pages_migrate(void *arg)
if (err)
continue;
- err = intel_migrate_clear(&gt->migrate, &ww, NULL,
+ err = intel_migrate_clear(&gt->migrate, &ww, deps,
obj->mm.pages->sgl, obj->cache_level,
i915_gem_object_is_lmem(obj),
0xdeadbeaf, &rq);
if (rq) {
dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+ i915_gem_object_set_moving_fence(obj, &rq->fence);
i915_request_put(rq);
}
if (err)
continue;
- err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE,
- 5 * HZ);
- if (err)
- continue;
-
- err = igt_fill_check_buffer(obj, true);
- if (err)
- continue;
+ if (!vma) {
+ err = igt_fill_check_buffer(obj, true);
+ if (err)
+ continue;
+ }
}
if (err)
goto out_put;
@@ -204,7 +238,7 @@ static int igt_lmem_pages_migrate(void *arg)
*/
for (i = 1; i <= 5; ++i) {
for_i915_gem_ww(&ww, err, true)
- err = lmem_pages_migrate_one(&ww, obj);
+ err = lmem_pages_migrate_one(&ww, obj, vma);
if (err)
goto out_put;
}
@@ -213,12 +247,27 @@ static int igt_lmem_pages_migrate(void *arg)
if (err)
goto out_put;
+ if (spin) {
+ if (dma_fence_is_signaled(spin_fence)) {
+ pr_err("Spinner was terminated by hangcheck.\n");
+ err = -EBUSY;
+ goto out_unlock;
+ }
+ igt_spinner_end(spin);
+ }
+
/* Finally sync migration and check content. */
err = i915_gem_object_wait_migration(obj, true);
if (err)
goto out_unlock;
- err = igt_fill_check_buffer(obj, false);
+ if (vma) {
+ err = i915_vma_wait_for_bind(vma);
+ if (err)
+ goto out_unlock;
+ } else {
+ err = igt_fill_check_buffer(obj, false);
+ }
out_unlock:
i915_gem_object_unlock(obj);
@@ -231,6 +280,7 @@ out_put:
static int igt_lmem_pages_failsafe_migrate(void *arg)
{
int fail_gpu, fail_alloc, ret;
+ struct intel_gt *gt = arg;
for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
@@ -238,7 +288,118 @@ static int igt_lmem_pages_failsafe_migrate(void *arg)
fail_gpu, fail_alloc);
i915_ttm_migrate_set_failure_modes(fail_gpu,
fail_alloc);
- ret = igt_lmem_pages_migrate(arg);
+ ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL);
+ if (ret)
+ goto out_err;
+ }
+ }
+
+out_err:
+ i915_ttm_migrate_set_failure_modes(false, false);
+ return ret;
+}
+
+/*
+ * This subtest tests that unbinding at migration is indeed performed
+ * async. We launch a spinner and a number of migrations depending on
+ * that spinner to have terminated. Before each migration we bind a
+ * vma, which should then be async unbound by the migration operation.
+ * If we are able to schedule migrations without blocking while the
+ * spinner is still running, those unbinds are indeed async and non-
+ * blocking.
+ *
+ * Note that each async bind operation is awaiting the previous migration
+ * due to the moving fence resulting from the migration.
+ */
+static int igt_async_migrate(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct i915_ppgtt *ppgtt;
+ struct igt_spinner spin;
+ int err;
+
+ ppgtt = i915_ppgtt_create(gt, 0);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
+
+ if (igt_spinner_init(&spin, gt)) {
+ err = -ENOMEM;
+ goto out_spin;
+ }
+
+ for_each_engine(engine, gt, id) {
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true
+ };
+ struct dma_fence *spin_fence;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct i915_deps deps;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_ce;
+ }
+
+ /*
+ * Use MI_NOOP, making the spinner non-preemptible. If there
+ * is a code path where we fail async operation due to the
+ * running spinner, we will block and fail to end the
+ * spinner resulting in a deadlock. But with a non-
+ * preemptible spinner, hangcheck will terminate the spinner
+ * for us, and we will later detect that and fail the test.
+ */
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ i915_deps_init(&deps, GFP_KERNEL);
+ err = i915_deps_add_dependency(&deps, &rq->fence, &ctx);
+ spin_fence = dma_fence_get(&rq->fence);
+ i915_request_add(rq);
+ if (err)
+ goto out_ce;
+
+ err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin,
+ spin_fence);
+ i915_deps_fini(&deps);
+ dma_fence_put(spin_fence);
+ if (err)
+ goto out_ce;
+ }
+
+out_ce:
+ igt_spinner_fini(&spin);
+out_spin:
+ i915_vm_put(&ppgtt->vm);
+
+ return err;
+}
+
+/*
+ * Setting ASYNC_FAIL_ALLOC to 2 will simulate memory allocation failure while
+ * arming the migration error check and block async migration. This
+ * will cause us to deadlock and hangcheck will terminate the spinner
+ * causing the test to fail.
+ */
+#define ASYNC_FAIL_ALLOC 1
+static int igt_lmem_async_migrate(void *arg)
+{
+ int fail_gpu, fail_alloc, ret;
+ struct intel_gt *gt = arg;
+
+ for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+ for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) {
+ pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+ fail_gpu, fail_alloc);
+ i915_ttm_migrate_set_failure_modes(fail_gpu,
+ fail_alloc);
+ ret = igt_async_migrate(gt);
if (ret)
goto out_err;
}
@@ -256,6 +417,7 @@ int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_lmem_create_migrate),
SUBTEST(igt_same_create_migrate),
SUBTEST(igt_lmem_pages_failsafe_migrate),
+ SUBTEST(igt_lmem_async_migrate),
};
if (!HAS_LMEM(i915))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 65e37d3cbb97..8ae1a1530bd8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -168,7 +168,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
kunmap(p);
out:
+ i915_gem_object_lock(obj, NULL);
__i915_vma_put(vma);
+ i915_gem_object_unlock(obj);
return err;
}
@@ -263,7 +265,9 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
if (err)
return err;
+ i915_gem_object_lock(obj, NULL);
__i915_vma_put(vma);
+ i915_gem_object_unlock(obj);
if (igt_timeout(end_time,
"%s: timed out after tiling=%d stride=%d\n",
@@ -309,7 +313,7 @@ static int igt_partial_tiling(void *arg)
int tiling;
int err;
- if (!i915_ggtt_has_aperture(&i915->ggtt))
+ if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return 0;
/* We want to check the page mapping and fencing of a large object
@@ -322,7 +326,7 @@ static int igt_partial_tiling(void *arg)
obj = huge_gem_object(i915,
nreal << PAGE_SHIFT,
- (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+ (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -368,10 +372,10 @@ static int igt_partial_tiling(void *arg)
tile.tiling = tiling;
switch (tiling) {
case I915_TILING_X:
- tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+ tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
break;
case I915_TILING_Y:
- tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+ tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
break;
}
@@ -442,7 +446,7 @@ static int igt_smoke_tiling(void *arg)
IGT_TIMEOUT(end);
int err;
- if (!i915_ggtt_has_aperture(&i915->ggtt))
+ if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return 0;
/*
@@ -459,7 +463,7 @@ static int igt_smoke_tiling(void *arg)
obj = huge_gem_object(i915,
nreal << PAGE_SHIFT,
- (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+ (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -488,10 +492,10 @@ static int igt_smoke_tiling(void *arg)
break;
case I915_TILING_X:
- tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+ tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
break;
case I915_TILING_Y:
- tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+ tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
break;
}
@@ -858,6 +862,7 @@ static int wc_check(struct drm_i915_gem_object *obj)
static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
if (obj->ops->mmap_offset)
@@ -866,7 +871,7 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
return false;
if (type == I915_MMAP_TYPE_GTT &&
- !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+ !i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return false;
i915_gem_object_lock(obj, NULL);
@@ -1353,7 +1358,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
* for other objects. Ergo we have to revoke the previous mmap PTE
* access as it no longer points to the same object.
*/
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ i915_gem_object_unlock(obj);
if (err) {
pr_err("Failed to unbind object!\n");
goto out_unmap;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index 740ee8086a27..fe0a890775e2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -43,7 +43,7 @@ static int igt_gem_huge(void *arg)
obj = huge_gem_object(i915,
nreal * PAGE_SIZE,
- i915->ggtt.vm.total + PAGE_SIZE);
+ to_gt(i915)->ggtt->vm.total + PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 0bd0d611e0c8..871fe7bda0e0 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -108,17 +108,17 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
}
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory * const pd = ppgtt->pd;
- unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+ unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
unsigned int act_pt = first_entry / GEN6_PTES;
unsigned int act_pte = first_entry % GEN6_PTES;
const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
- struct sgt_dma iter = sgt_dma(vma);
+ struct sgt_dma iter = sgt_dma(vma_res);
gen6_pte_t *vaddr;
GEM_BUG_ON(!pd->entry[act_pt]);
@@ -144,7 +144,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
}
} while (1);
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
@@ -275,13 +275,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
static void pd_vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- struct gen6_ppgtt *ppgtt = vma->private;
- u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+ struct gen6_ppgtt *ppgtt = vma_res->private;
+ u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE;
ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
@@ -289,9 +289,10 @@ static void pd_vma_bind(struct i915_address_space *vm,
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
}
-static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
+static void pd_vma_unbind(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res)
{
- struct gen6_ppgtt *ppgtt = vma->private;
+ struct gen6_ppgtt *ppgtt = vma_res->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
struct i915_page_table *pt;
unsigned int pde;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index b012c50f7ce7..c43e724afa9f 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -453,20 +453,21 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
return idx;
}
-static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
struct sgt_dma *iter,
enum i915_cache_level cache_level,
u32 flags)
{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
- u64 start = vma->node.start;
+ u64 start = vma_res->start;
- GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+ GEM_BUG_ON(!i915_vm_is_4lvl(vm));
do {
struct i915_page_directory * const pdp =
- gen8_pdp_for_page_address(vma->vm, start);
+ gen8_pdp_for_page_address(vm, start);
struct i915_page_directory * const pd =
i915_pd_entry(pdp, __gen8_pte_index(start, 2));
gen8_pte_t encode = pte_encode;
@@ -475,7 +476,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
gen8_pte_t *vaddr;
u16 index;
- if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+ if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
rem >= I915_GTT_PAGE_SIZE_2M &&
!__gen8_pte_index(start, 0)) {
@@ -492,7 +493,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
page_size = I915_GTT_PAGE_SIZE;
if (!index &&
- vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+ vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
@@ -541,9 +542,9 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
*/
if (maybe_64K != -1 &&
(index == I915_PDES ||
- (i915_vm_has_scratch_64K(vma->vm) &&
- !iter->sg && IS_ALIGNED(vma->node.start +
- vma->node.size,
+ (i915_vm_has_scratch_64K(vm) &&
+ !iter->sg && IS_ALIGNED(vma_res->start +
+ vma_res->node_size,
I915_GTT_PAGE_SIZE_2M)))) {
vaddr = px_vaddr(pd);
vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
@@ -559,10 +560,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
* instead - which we detect as missing results during
* selftests.
*/
- if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+ if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
u16 i;
- encode = vma->vm->scratch[0]->encode;
+ encode = vm->scratch[0]->encode;
vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
for (i = 1; i < index; i += 16)
@@ -572,22 +573,22 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
}
- vma->page_sizes.gtt |= page_size;
+ vma_res->page_sizes_gtt |= page_size;
} while (iter->sg && sg_dma_len(iter->sg));
}
static void gen8_ppgtt_insert(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
- struct sgt_dma iter = sgt_dma(vma);
+ struct sgt_dma iter = sgt_dma(vma_res);
- if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
- gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+ if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
} else {
- u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+ u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
do {
struct i915_page_directory * const pdp =
@@ -597,7 +598,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
cache_level, flags);
} while (idx);
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ba083d800a08..5d0ec7c49b6a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce)
__i915_active_acquire(&ce->active);
- if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
+ if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
+ intel_context_is_parallel(ce))
return 0;
/* Preallocate tracking nodes */
@@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent,
* Callers responsibility to validate that this function is used
* correctly but we use GEM_BUG_ON here ensure that they do.
*/
- GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
GEM_BUG_ON(intel_context_is_pinned(parent));
GEM_BUG_ON(intel_context_is_child(parent));
GEM_BUG_ON(intel_context_is_pinned(child));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 959e9300ac9e..e53008b4dd05 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1700,18 +1700,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
{
- struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
+ struct i915_vma_resource *vma_res = rq->batch_res;
void *ring;
int size;
- if (!i915_vma_snapshot_present(vsnap))
- vsnap = NULL;
-
drm_printf(m,
"[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
rq->head, rq->postfix, rq->tail,
- vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
- vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
+ vma_res ? upper_32_bits(vma_res->start) : ~0u,
+ vma_res ? lower_32_bits(vma_res->start) : ~0u);
size = rq->tail - rq->head;
if (rq->tail < rq->head)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 4a9ef688fac2..961d795220a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2601,6 +2601,43 @@ static void execlists_context_cancel_request(struct intel_context *ce,
current->comm);
}
+static struct intel_context *
+execlists_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width)
+{
+ struct intel_context *parent = NULL, *ce, *err;
+ int i;
+
+ GEM_BUG_ON(num_siblings != 1);
+
+ for (i = 0; i < width; ++i) {
+ ce = intel_context_create(engines[i]);
+ if (IS_ERR(ce)) {
+ err = ce;
+ goto unwind;
+ }
+
+ if (i == 0)
+ parent = ce;
+ else
+ intel_context_bind_parent_child(parent, ce);
+ }
+
+ parent->parallel.fence_context = dma_fence_context_alloc(1);
+
+ intel_context_set_nopreempt(parent);
+ for_each_child(parent, ce)
+ intel_context_set_nopreempt(ce);
+
+ return parent;
+
+unwind:
+ if (parent)
+ intel_context_put(parent);
+ return err;
+}
+
static const struct intel_context_ops execlists_context_ops = {
.flags = COPS_HAS_INFLIGHT,
@@ -2619,6 +2656,7 @@ static const struct intel_context_ops execlists_context_ops = {
.reset = lrc_reset,
.destroy = lrc_destroy,
+ .create_parallel = execlists_create_parallel,
.create_virtual = execlists_create_virtual,
};
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index d2922f64d1c8..8850d4e0f9cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -87,7 +87,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
* beyond the end of the batch buffer, across the page boundary,
* and beyond the end of the GTT if we do not provide a guard.
*/
- ret = ggtt_init_hw(&i915->ggtt);
+ ret = ggtt_init_hw(to_gt(i915)->ggtt);
if (ret)
return ret;
@@ -130,22 +130,51 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
+retry:
+ i915_gem_drain_freed_objects(vm->i915);
+
mutex_lock(&vm->mutex);
/* Skip rewriting PTE on VMA unbind. */
open = atomic_xchg(&vm->open, 0);
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- i915_vma_wait_for_bind(vma);
- if (i915_vma_is_pinned(vma))
+ if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
continue;
+ /* unlikely to race when GPU is idle, so no worry about slowpath.. */
+ if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
+ /*
+ * No dead objects should appear here, GPU should be
+ * completely idle, and userspace suspended
+ */
+ i915_gem_object_get(obj);
+
+ atomic_set(&vm->open, open);
+ mutex_unlock(&vm->mutex);
+
+ i915_gem_object_lock(obj, NULL);
+ open = i915_vma_unbind(vma);
+ i915_gem_object_unlock(obj);
+
+ GEM_WARN_ON(open);
+
+ i915_gem_object_put(obj);
+ goto retry;
+ }
+
if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
- __i915_vma_evict(vma);
+ i915_vma_wait_for_bind(vma);
+
+ __i915_vma_evict(vma, false);
drm_mm_remove_node(&vma->node);
}
+
+ i915_gem_object_unlock(obj);
}
vm->clear_range(vm, 0, vm->total);
@@ -236,7 +265,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
}
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level level,
u32 flags)
{
@@ -253,10 +282,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
*/
gte = (gen8_pte_t __iomem *)ggtt->gsm;
- gte += vma->node.start / I915_GTT_PAGE_SIZE;
- end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+ gte += vma_res->start / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
- for_each_sgt_daddr(addr, iter, vma->pages)
+ for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
gen8_set_pte(gte++, pte_encode | addr);
GEM_BUG_ON(gte > end);
@@ -293,7 +322,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
* through the GMADR mapped BAR (i915->mm.gtt->gtt).
*/
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level level,
u32 flags)
{
@@ -304,10 +333,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
dma_addr_t addr;
gte = (gen6_pte_t __iomem *)ggtt->gsm;
- gte += vma->node.start / I915_GTT_PAGE_SIZE;
- end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+ gte += vma_res->start / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
- for_each_sgt_daddr(addr, iter, vma->pages)
+ for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
iowrite32(vm->pte_encode(addr, level, flags), gte++);
GEM_BUG_ON(gte > end);
@@ -390,7 +419,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
struct insert_entries {
struct i915_address_space *vm;
- struct i915_vma *vma;
+ struct i915_vma_resource *vma_res;
enum i915_cache_level level;
u32 flags;
};
@@ -399,18 +428,18 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
{
struct insert_entries *arg = _arg;
- gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+ gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
bxt_vtd_ggtt_wa(arg->vm);
return 0;
}
static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level level,
u32 flags)
{
- struct insert_entries arg = { vm, vma, level, flags };
+ struct insert_entries arg = { vm, vma_res, level, flags };
stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
}
@@ -449,14 +478,14 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
}
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 unused)
{
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
- intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+ intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
flags);
}
@@ -468,30 +497,32 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
static void ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
- struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
- if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
+ if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
return;
+ vma_res->bound_flags |= flags;
+
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
- if (i915_gem_object_is_readonly(obj))
+ if (vma_res->bi.readonly)
pte_flags |= PTE_READ_ONLY;
- if (i915_gem_object_is_lmem(obj))
+ if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
- vm->insert_entries(vm, vma, cache_level, pte_flags);
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+ vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
-static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void ggtt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res)
{
- vm->clear_range(vm, vma->node.start, vma->size);
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -505,7 +536,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
size = ggtt->vm.total - GUC_GGTT_TOP;
- ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
PIN_NOEVICT);
if (ret)
@@ -624,7 +655,7 @@ err:
static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
@@ -632,25 +663,27 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
/* Currently applicable only to VLV */
pte_flags = 0;
- if (i915_gem_object_is_readonly(vma->obj))
+ if (vma_res->bi.readonly)
pte_flags |= PTE_READ_ONLY;
if (flags & I915_VMA_LOCAL_BIND)
ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
- stash, vma, cache_level, flags);
+ stash, vma_res, cache_level, flags);
if (flags & I915_VMA_GLOBAL_BIND)
- vm->insert_entries(vm, vma, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+
+ vma_res->bound_flags |= flags;
}
static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
- struct i915_vma *vma)
+ struct i915_vma_resource *vma_res)
{
- if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
- vm->clear_range(vm, vma->node.start, vma->size);
+ if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
- if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
- ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
+ if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
+ ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
}
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
@@ -723,14 +756,14 @@ int i915_init_ggtt(struct drm_i915_private *i915)
{
int ret;
- ret = init_ggtt(&i915->ggtt);
+ ret = init_ggtt(to_gt(i915)->ggtt);
if (ret)
return ret;
if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
- ret = init_aliasing_ppgtt(&i915->ggtt);
+ ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
if (ret)
- cleanup_init_ggtt(&i915->ggtt);
+ cleanup_init_ggtt(to_gt(i915)->ggtt);
}
return 0;
@@ -743,11 +776,21 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
atomic_set(&ggtt->vm.open, 0);
flush_workqueue(ggtt->vm.i915->wq);
+ i915_gem_drain_freed_objects(ggtt->vm.i915);
mutex_lock(&ggtt->vm.mutex);
- list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+ bool trylock;
+
+ trylock = i915_gem_object_trylock(obj, NULL);
+ WARN_ON(!trylock);
+
WARN_ON(__i915_vma_unbind(vma));
+ if (trylock)
+ i915_gem_object_unlock(obj);
+ }
if (drm_mm_node_allocated(&ggtt->error_capture))
drm_mm_remove_node(&ggtt->error_capture);
@@ -773,7 +816,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
*/
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
fini_aliasing_ppgtt(ggtt);
@@ -788,7 +831,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
*/
void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
dma_resv_fini(&ggtt->vm._resv);
@@ -1209,7 +1252,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
{
int ret;
- ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915));
+ ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
if (ret)
return ret;
@@ -1281,7 +1324,7 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
GEM_BUG_ON(!was_bound);
- vma->ops->bind_vma(vm, NULL, vma,
+ vma->ops->bind_vma(vm, NULL, vma->resource,
obj ? obj->cache_level : 0,
was_bound);
if (obj) { /* only used during resume => exclusive access */
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index f294753bc947..76880fb8fc19 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -428,7 +428,6 @@ int i915_vma_pin_fence(struct i915_vma *vma)
* must keep the device awake whilst using the fence.
*/
assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
- GEM_BUG_ON(!i915_vma_is_pinned(vma));
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
err = mutex_lock_interruptible(&vma->vm->mutex);
@@ -731,8 +730,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
}
- i915->ggtt.bit_6_swizzle_x = swizzle_x;
- i915->ggtt.bit_6_swizzle_y = swizzle_y;
+ to_gt(i915)->ggtt->bit_6_swizzle_x = swizzle_x;
+ to_gt(i915)->ggtt->bit_6_swizzle_y = swizzle_y;
}
/*
@@ -899,7 +898,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
if (GRAPHICS_VER(i915) < 5 ||
- i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+ to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
return;
intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 545a2b1f1834..e8403fa53909 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,6 +3,7 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/drm_managed.h>
#include <drm/intel-gtt.h>
#include "gem/i915_gem_internal.h"
@@ -90,9 +91,11 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
return 0;
}
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+int intel_gt_assign_ggtt(struct intel_gt *gt)
{
- gt->ggtt = ggtt;
+ gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
+
+ return gt->ggtt ? 0 : -ENOMEM;
}
static const struct intel_mmio_range icl_l3bank_steering_table[] = {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index a913fb6ffec3..2dad46c3eff2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -36,7 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int intel_gt_assign_ggtt(struct intel_gt *gt);
int intel_gt_probe_lmem(struct intel_gt *gt);
int intel_gt_init_mmio(struct intel_gt *gt);
int __must_check intel_gt_init_hw(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index e8143fa4b5a8..bf4b942c62ee 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1042,6 +1042,7 @@
#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
+#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 8506db9983da..49a8fb63e6e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -163,6 +163,9 @@ static void __i915_vm_release(struct work_struct *work)
struct i915_address_space *vm =
container_of(work, struct i915_address_space, release_work);
+ /* Synchronize async unbinds. */
+ i915_vma_resource_bind_dep_sync_all(vm);
+
vm->cleanup(vm);
i915_address_space_fini(vm);
@@ -191,6 +194,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
if (!kref_read(&vm->resv_ref))
kref_init(&vm->resv_ref);
+ vm->pending_unbind = RB_ROOT_CACHED;
INIT_WORK(&vm->release_work, __i915_vm_release);
atomic_set(&vm->open, 1);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 177b42b935a1..8073438b67c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -27,6 +27,7 @@
#include "gt/intel_reset.h"
#include "i915_selftest.h"
+#include "i915_vma_resource.h"
#include "i915_vma_types.h"
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -200,7 +201,7 @@ struct i915_vma_ops {
/* Map an object into an address space with the given cache flags. */
void (*bind_vma)(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags);
/*
@@ -208,7 +209,8 @@ struct i915_vma_ops {
* setting the valid PTE entries to a reserved scratch page.
*/
void (*unbind_vma)(struct i915_address_space *vm,
- struct i915_vma *vma);
+ struct i915_vma_resource *vma_res);
+
};
struct i915_address_space {
@@ -263,6 +265,9 @@ struct i915_address_space {
/* Flags used when creating page-table objects for this vm */
unsigned long lmem_pt_obj_flags;
+ /* Interval tree for pending unbind vma resources */
+ struct rb_root_cached pending_unbind;
+
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
struct drm_i915_gem_object *
@@ -285,7 +290,7 @@ struct i915_address_space {
enum i915_cache_level cache_level,
u32 flags);
void (*insert_entries)(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags);
void (*cleanup)(struct i915_address_space *vm);
@@ -600,11 +605,11 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
- struct i915_vma *vma);
+ struct i915_vma_resource *vma_res);
void gtt_write_workarounds(struct intel_gt *gt);
@@ -627,8 +632,8 @@ __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long
static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
- struct scatterlist *sg = vma->pages->sgl;
+} sgt_dma(struct i915_vma_resource *vma_res) {
+ struct scatterlist *sg = vma_res->bi.pages->sgl;
dma_addr_t addr = sg_dma_address(sg);
return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 91c87a3a8e0b..004e1216e654 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1069,6 +1069,10 @@ lrc_pin(struct intel_context *ce,
void lrc_unpin(struct intel_context *ce)
{
+ if (unlikely(ce->parallel.last_rq)) {
+ i915_request_put(ce->parallel.last_rq);
+ ce->parallel.last_rq = NULL;
+ }
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
}
@@ -1164,6 +1168,29 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
return cs;
}
+/*
+ * On DG2 during context restore of a preempted context in GPGPU mode,
+ * RCS restore hang is detected. This is extremely timing dependent.
+ * To address this below sw wabb is implemented for DG2 A steppings.
+ */
+static u32 *
+dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
+ *cs++ = 0x21;
+
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+ *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
+
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+ *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
+
+ return cs;
+}
+
static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
{
@@ -1171,6 +1198,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
+ /* Wa_22011450934:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
+ cs = dg2_emit_rcs_hang_wabb(ce, cs);
+
/* Wa_16013000631:dg2 */
if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
IS_DG2_G11(ce->engine->i915))
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 083b3090c69c..48e6e2f87700 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -179,32 +179,34 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
u32 pte_flags;
- if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
- vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
- set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+ if (!vma_res->allocated) {
+ vm->allocate_va_range(vm, stash, vma_res->start,
+ vma_res->vma_size);
+ vma_res->allocated = true;
}
/* Applicable to VLV, and gen8+ */
pte_flags = 0;
- if (i915_gem_object_is_readonly(vma->obj))
+ if (vma_res->bi.readonly)
pte_flags |= PTE_READ_ONLY;
- if (i915_gem_object_is_lmem(vma->obj))
+ if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
- vm->insert_entries(vm, vma, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, cache_level, pte_flags);
wmb();
}
-void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+void ppgtt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res)
{
- if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
- vm->clear_range(vm, vma->node.start, vma->size);
+ if (vma_res->allocated)
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index cb5a67c98f30..cb3f66707b21 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -16,7 +16,7 @@
static int init_fake_lmem_bar(struct intel_memory_region *mem)
{
struct drm_i915_private *i915 = mem->i915;
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
unsigned long n;
int ret;
@@ -132,7 +132,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
if (!i915->params.fake_lmem_start)
return ERR_PTR(-ENODEV);
- GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt));
+ GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt));
/* Your mappable aperture belongs to me now! */
mappable_end = pci_resource_len(pdev, 2);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ae7542f70afb..82713264b96c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -604,6 +604,15 @@ static int gen8_reset_engines(struct intel_gt *gt,
*/
}
+ /*
+ * Wa_22011100796:dg2, whenever Full soft reset is required,
+ * reset all individual engines firstly, and then do a full soft reset.
+ *
+ * This is best effort, so ignore any error from the initial reset.
+ */
+ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
+ gen11_reset_engines(gt, gt->info.engine_mask, 0);
+
if (GRAPHICS_VER(gt->i915) >= 11)
ret = gen11_reset_engines(gt, engine_mask, retry);
else
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index eeda1692d845..b3067aed7f3e 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -683,12 +683,6 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_16013271637:dg2 */
wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
-
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
}
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@ -1440,10 +1434,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010680813:dg2_g10 */
- wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
-
/* Wa_14010948348:dg2_g10 */
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
@@ -1490,16 +1480,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
}
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- /* Wa_1509235366:dg2 */
- wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
- GLOBAL_INVALIDATION_MODE);
-
/* Wa_14014830051:dg2 */
wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
@@ -1508,7 +1488,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
* recommended tuning settings documented in the bspec's
* performance guide section.
*/
- wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
}
@@ -2040,7 +2019,29 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ if (IS_DG2(i915)) {
+ /* Wa_14015227452:dg2 */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+ /* Wa_1509235366:dg2 */
+ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+ GLOBAL_INVALIDATION_MODE);
+
+ /*
+ * The following are not actually "workarounds" but rather
+ * recommended tuning settings documented in the bspec's
+ * performance guide section.
+ */
+ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+
+ /* Wa_18018781329:dg2 */
+ wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14013392000:dg2_g11 */
wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
@@ -2048,15 +2049,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14012419201:dg2 */
wa_masked_en(wal, GEN9_ROW_CHICKEN4,
GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(engine->i915)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_G11(i915)) {
/*
* Wa_22012826095:dg2
* Wa_22013059131:dg2
@@ -2071,14 +2072,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
/* Wa_1308578152:dg2_g10 when first gslice is fused off */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) &&
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
needs_wa_1308578152(engine)) {
wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
GEN12_REPLAY_MODE_GRANULARITY);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+ IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
/* Wa_22013037850:dg2 */
wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
DISABLE_128B_EVICTION_COMMAND_UDW);
@@ -2095,7 +2096,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/*
* Wa_1608949956:dg2_g10
* Wa_14010198302:dg2_g10
@@ -2114,7 +2115,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
0, false);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/* Wa_22010430635:dg2 */
wa_masked_en(wal,
GEN9_ROW_CHICKEN4,
@@ -2124,8 +2125,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_G11(engine->i915)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_G11(i915)) {
/* Wa_22012654132:dg2 */
wa_add(wal, GEN10_CACHE_MODE_SS, 0,
_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
@@ -2134,10 +2135,28 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
/* Wa_14013202645:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+ /* Wa_22012532006:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+ wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ /* Wa_14010680813:dg2_g10 */
+ wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
+ EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ /* Wa_14012362059:dg2 */
+ wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+ }
+
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9e9ccb139ba7..83ff4c2e57c5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1384,7 +1384,7 @@ static int evict_vma(void *data)
complete(&arg->completion);
mutex_lock(&vm->mutex);
- err = i915_gem_evict_for_node(vm, &evict, 0);
+ err = i915_gem_evict_for_node(vm, NULL, &evict, 0);
mutex_unlock(&vm->mutex);
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8a873f6bda7f..37c38bdd5f47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -19,7 +19,7 @@ __igt_reset_stolen(struct intel_gt *gt,
intel_engine_mask_t mask,
const char *msg)
{
- struct i915_ggtt *ggtt = &gt->i915->ggtt;
+ struct i915_ggtt *ggtt = gt->ggtt;
const struct resource *dsm = &gt->i915->dsm;
resource_size_t num_pages, page;
struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index fe5d7d261797..7afdadc7656f 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -7,9 +7,9 @@
#define _ABI_GUC_ACTIONS_ABI_H
/**
- * DOC: HOST2GUC_REGISTER_CTB
+ * DOC: HOST2GUC_SELF_CFG
*
- * This message is used as part of the `CTB based communication`_ setup.
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
*
* This message must be sent as `MMIO HXG Message`_.
*
@@ -22,20 +22,18 @@
* | +-------+--------------------------------------------------------------+
* | | 27:16 | DATA0 = MBZ |
* | +-------+--------------------------------------------------------------+
- * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505 |
+ * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 |
* +---+-------+--------------------------------------------------------------+
- * | 1 | 31:12 | RESERVED = MBZ |
+ * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ |
* | +-------+--------------------------------------------------------------+
- * | | 11:8 | **TYPE** - type for the `CT Buffer`_ |
+ * | | 15:0 | **KLV_LEN** - KLV length |
* | | | |
- * | | | - _`GUC_CTB_TYPE_HOST2GUC` = 0 |
- * | | | - _`GUC_CTB_TYPE_GUC2HOST` = 1 |
- * | +-------+--------------------------------------------------------------+
- * | | 7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1 |
+ * | | | - 32 bit KLV = 1 |
+ * | | | - 64 bit KLV = 2 |
* +---+-------+--------------------------------------------------------------+
- * | 2 | 31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_ |
+ * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value |
* +---+-------+--------------------------------------------------------------+
- * | 3 | 31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_ |
+ * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) |
* +---+-------+--------------------------------------------------------------+
*
* +---+-------+--------------------------------------------------------------+
@@ -45,28 +43,25 @@
* | +-------+--------------------------------------------------------------+
* | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
* | +-------+--------------------------------------------------------------+
- * | | 27:0 | DATA0 = MBZ |
+ * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized |
* +---+-------+--------------------------------------------------------------+
*/
-#define GUC_ACTION_HOST2GUC_REGISTER_CTB 0x4505
+#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8)
-#define GUC_CTB_TYPE_HOST2GUC 0u
-#define GUC_CTB_TYPE_GUC2HOST 1u
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE (0xff << 0)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR GUC_HXG_REQUEST_MSG_n_DATAn
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0
/**
- * DOC: HOST2GUC_DEREGISTER_CTB
+ * DOC: HOST2GUC_CONTROL_CTB
*
- * This message is used as part of the `CTB based communication`_ teardown.
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
*
* This message must be sent as `MMIO HXG Message`_.
*
@@ -79,15 +74,12 @@
* | +-------+--------------------------------------------------------------+
* | | 27:16 | DATA0 = MBZ |
* | +-------+--------------------------------------------------------------+
- * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506 |
+ * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 |
* +---+-------+--------------------------------------------------------------+
- * | 1 | 31:12 | RESERVED = MBZ |
- * | +-------+--------------------------------------------------------------+
- * | | 11:8 | **TYPE** - type of the `CT Buffer`_ |
+ * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ |
* | | | |
- * | | | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_ |
- * | +-------+--------------------------------------------------------------+
- * | | 7:0 | RESERVED = MBZ |
+ * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 |
+ * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 |
* +---+-------+--------------------------------------------------------------+
*
* +---+-------+--------------------------------------------------------------+
@@ -100,16 +92,16 @@
* | | 27:0 | DATA0 = MBZ |
* +---+-------+--------------------------------------------------------------+
*/
-#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB 0x4506
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2 (0xff << 0)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn
+#define GUC_CTB_CONTROL_DISABLE 0u
+#define GUC_CTB_CONTROL_ENABLE 1u
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
/* legacy definitions */
@@ -143,8 +135,12 @@ enum intel_guc_action {
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
- INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+ INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+ INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+ INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+ INTEL_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee89..c20658ee85a5 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -11,4 +11,27 @@ enum intel_guc_response_status {
INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
};
+enum intel_guc_load_status {
+ INTEL_GUC_LOAD_STATUS_DEFAULT = 0x00,
+ INTEL_GUC_LOAD_STATUS_START = 0x01,
+ INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02,
+ INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03,
+ INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04,
+ INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10,
+ INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20,
+ INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
+ INTEL_GUC_LOAD_STATUS_GUCINT_DONE = 0x40,
+ INTEL_GUC_LOAD_STATUS_DPC_READY = 0x50,
+ INTEL_GUC_LOAD_STATUS_DPC_ERROR = 0x60,
+ INTEL_GUC_LOAD_STATUS_EXCEPTION = 0x70,
+ INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71,
+ INTEL_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72,
+ INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+ INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
+ INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
+ INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+ INTEL_GUC_LOAD_STATUS_READY = 0xF0,
+};
+
#endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..f0814a57c191
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+/**
+ * DOC: GuC KLV
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:16 | **KEY** - KLV key identifier |
+ * | | | - `GuC Self Config KLVs`_ |
+ * | | | |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) |
+ * +---+-------+ |
+ * |...| | |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN 1u
+#define GUC_KLV_0_KEY (0xffff << 16)
+#define GUC_KLV_0_LEN (0xffff << 0)
+#define GUC_KLV_n_VALUE (0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR` : 0x0903
+ * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE` : 0x0904
+ * Refers to size of H2G `CT Buffer`_ in bytes.
+ * Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR` : 0x0905
+ * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR` : 0x0906
+ * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_SIZE` : 0x0907
+ * Refers to size of G2H `CT Buffer`_ in bytes.
+ * Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
+
+#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5bab32fef120..447a976c9f25 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -184,6 +184,9 @@ void intel_guc_init_early(struct intel_guc *guc)
guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
}
+
+ intel_guc_enable_msg(guc, INTEL_GUC_RECV_MSG_EXCEPTION |
+ INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
}
void intel_guc_init_late(struct intel_guc *guc)
@@ -224,32 +227,48 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
u32 flags;
#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
- #define UNIT SZ_1M
- #define FLAG GUC_LOG_ALLOC_IN_MEGABYTE
+ #define LOG_UNIT SZ_1M
+ #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
#else
- #define UNIT SZ_4K
- #define FLAG 0
+ #define LOG_UNIT SZ_4K
+ #define LOG_FLAG 0
+ #endif
+
+ #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+ #define CAPTURE_UNIT SZ_1M
+ #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+ #else
+ #define CAPTURE_UNIT SZ_4K
+ #define CAPTURE_FLAG 0
#endif
BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
- BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT));
+ BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
- BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT));
+ BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+ BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
- BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) >
+ BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
- BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) >
+ BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+ BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+ (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
flags = GUC_LOG_VALID |
GUC_LOG_NOTIFY_ON_HALF_FULL |
- FLAG |
- ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
- ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+ CAPTURE_FLAG |
+ LOG_FLAG |
+ ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+ ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+ ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) |
(offset << GUC_LOG_BUF_ADDR_SHIFT);
- #undef UNIT
- #undef FLAG
+ #undef LOG_UNIT
+ #undef LOG_FLAG
+ #undef CAPTURE_UNIT
+ #undef CAPTURE_FLAG
return flags;
}
@@ -262,6 +281,26 @@ static u32 guc_ctl_ads_flags(struct intel_guc *guc)
return flags;
}
+static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 flags = 0;
+
+ /* Wa_22012773006:gen11,gen12 < XeHP */
+ if (GRAPHICS_VER(gt->i915) >= 11 &&
+ GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+ flags |= GUC_WA_POLLCS;
+
+ return flags;
+}
+
+static u32 guc_ctl_devid(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
+}
+
/*
* Initialise the GuC parameter block before starting the firmware
* transfer. These parameters are read by the firmware on startup
@@ -278,6 +317,8 @@ static void guc_init_params(struct intel_guc *guc)
params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+ params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+ params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
@@ -515,9 +556,10 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
/* Make sure to handle only enabled messages */
msg = payload[0] & guc->msg_enabled_mask;
- if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
- INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED))
- intel_guc_log_handle_flush_event(&guc->log);
+ if (msg & INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+ drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC crash dump notification!\n");
+ if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
+ drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC exception notification!\n");
return 0;
}
@@ -551,7 +593,7 @@ int intel_guc_suspend(struct intel_guc *guc)
{
int ret;
u32 action[] = {
- INTEL_GUC_ACTION_RESET_CLIENT,
+ INTEL_GUC_ACTION_CLIENT_SOFT_RESET,
};
if (!intel_guc_is_ready(guc))
@@ -715,6 +757,56 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
return 0;
}
+static int __guc_action_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+ u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_SELF_CFG),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, lower_32_bits(value)),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, upper_32_bits(value)),
+ };
+ int ret;
+
+ GEM_BUG_ON(len > 2);
+ GEM_BUG_ON(len == 1 && upper_32_bits(value));
+
+ /* Self config must go over MMIO */
+ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+ if (unlikely(ret < 0))
+ return ret;
+ if (unlikely(ret > 1))
+ return -EPROTO;
+ if (unlikely(!ret))
+ return -ENOKEY;
+
+ return 0;
+}
+
+static int __guc_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int err = __guc_action_self_cfg(guc, key, len, value);
+
+ if (unlikely(err))
+ i915_probe_error(i915, "Unsuccessful self-config (%pe) key %#hx value %#llx\n",
+ ERR_PTR(err), key, value);
+ return err;
+}
+
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value)
+{
+ return __guc_self_cfg(guc, key, 1, value);
+}
+
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value)
+{
+ return __guc_self_cfg(guc, key, 2, value);
+}
+
/**
* intel_guc_load_status - dump information about GuC load status
* @guc: the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 3aabe164c329..9d779de16613 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -119,6 +119,15 @@ struct intel_guc {
* function as it might be in an atomic context (no sleeping)
*/
struct work_struct destroyed_worker;
+ /**
+ * @reset_fail_worker: worker to trigger a GT reset after an
+ * engine reset fails
+ */
+ struct work_struct reset_fail_worker;
+ /**
+ * @reset_fail_mask: mask of engines that failed to reset
+ */
+ intel_engine_mask_t reset_fail_mask;
} submission_state;
/**
@@ -141,6 +150,13 @@ struct intel_guc {
struct __guc_ads_blob *ads_blob;
/** @ads_regset_size: size of the save/restore regsets in the ADS */
u32 ads_regset_size;
+ /**
+ * @ads_regset_count: number of save/restore registers in the ADS for
+ * each engine
+ */
+ u32 ads_regset_count[I915_NUM_ENGINES];
+ /** @ads_regset: save/restore regsets in the ADS */
+ struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
/** @ads_engine_usage_size: size of engine usage in the ADS */
@@ -333,6 +349,8 @@ int intel_guc_resume(struct intel_guc *guc);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
struct i915_vma **out_vma, void **out_vaddr);
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value);
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value);
static inline bool intel_guc_is_supported(struct intel_guc *guc)
{
@@ -409,6 +427,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+ const u32 *msg, u32 len);
void intel_guc_find_hung_context(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 162b89198567..7e41175618f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -42,6 +42,10 @@
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
+ * | capture lists |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
* | private data |
* +---------------------------------------+
* | padding |
@@ -67,6 +71,12 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
}
+static u32 guc_ads_capture_size(struct intel_guc *guc)
+{
+ /* FIXME: Allocate a proper capture list */
+ return PAGE_ALIGN(PAGE_SIZE);
+}
+
static u32 guc_ads_private_data_size(struct intel_guc *guc)
{
return PAGE_ALIGN(guc->fw.private_data_size);
@@ -87,7 +97,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
-static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+static u32 guc_ads_capture_offset(struct intel_guc *guc)
{
u32 offset;
@@ -97,6 +107,16 @@ static u32 guc_ads_private_data_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+ u32 offset;
+
+ offset = guc_ads_capture_offset(guc) +
+ guc_ads_capture_size(guc);
+
+ return PAGE_ALIGN(offset);
+}
+
static u32 guc_ads_blob_size(struct intel_guc *guc)
{
return guc_ads_private_data_offset(guc) +
@@ -188,14 +208,18 @@ static void guc_mapping_table_init(struct intel_gt *gt,
/*
* The save/restore register list must be pre-calculated to a temporary
- * buffer of driver defined size before it can be generated in place
- * inside the ADS.
+ * buffer before it can be copied inside the ADS.
*/
-#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */
struct temp_regset {
+ /*
+ * ptr to the section of the storage for the engine currently being
+ * worked on
+ */
struct guc_mmio_reg *registers;
- u32 used;
- u32 size;
+ /* ptr to the base of the allocated storage for all engines */
+ struct guc_mmio_reg *storage;
+ u32 storage_used;
+ u32 storage_max;
};
static int guc_mmio_reg_cmp(const void *a, const void *b)
@@ -206,18 +230,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b)
return (int)ra->offset - (int)rb->offset;
}
-static void guc_mmio_reg_add(struct temp_regset *regset,
- u32 offset, u32 flags)
+static struct guc_mmio_reg * __must_check
+__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
{
- u32 count = regset->used;
+ u32 pos = regset->storage_used;
+ struct guc_mmio_reg *slot;
+
+ if (pos >= regset->storage_max) {
+ size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE);
+ struct guc_mmio_reg *r = krealloc(regset->storage,
+ size, GFP_KERNEL);
+ if (!r) {
+ WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n",
+ -ENOMEM);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ regset->registers = r + (regset->registers - regset->storage);
+ regset->storage = r;
+ regset->storage_max = size / sizeof(*slot);
+ }
+
+ slot = &regset->storage[pos];
+ regset->storage_used++;
+ *slot = *reg;
+
+ return slot;
+}
+
+static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
+ u32 offset, u32 flags)
+{
+ u32 count = regset->storage_used - (regset->registers - regset->storage);
struct guc_mmio_reg reg = {
.offset = offset,
.flags = flags,
};
struct guc_mmio_reg *slot;
- GEM_BUG_ON(count >= regset->size);
-
/*
* The mmio list is built using separate lists within the driver.
* It's possible that at some point we may attempt to add the same
@@ -226,11 +276,11 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
*/
if (bsearch(&reg, regset->registers, count,
sizeof(reg), guc_mmio_reg_cmp))
- return;
+ return 0;
- slot = &regset->registers[count];
- regset->used++;
- *slot = reg;
+ slot = __mmio_reg_add(regset, &reg);
+ if (IS_ERR(slot))
+ return PTR_ERR(slot);
while (slot-- > regset->registers) {
GEM_BUG_ON(slot[0].offset == slot[1].offset);
@@ -239,6 +289,8 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
swap(slot[1], slot[0]);
}
+
+ return 0;
}
#define GUC_MMIO_REG_ADD(regset, reg, masked) \
@@ -246,62 +298,71 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
i915_mmio_reg_offset((reg)), \
(masked) ? GUC_REGSET_MASKED : 0)
-static void guc_mmio_regset_init(struct temp_regset *regset,
- struct intel_engine_cs *engine)
+static int guc_mmio_regset_init(struct temp_regset *regset,
+ struct intel_engine_cs *engine)
{
const u32 base = engine->mmio_base;
struct i915_wa_list *wal = &engine->wa_list;
struct i915_wa *wa;
unsigned int i;
+ int ret = 0;
- regset->used = 0;
+ /*
+ * Each engine's registers point to a new start relative to
+ * storage
+ */
+ regset->registers = regset->storage + regset->storage_used;
- GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
- GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
- GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+ ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
+ ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
+ ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
+ ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
- GUC_MMIO_REG_ADD(regset,
- RING_FORCE_TO_NONPRIV(base, i),
- false);
+ ret |= GUC_MMIO_REG_ADD(regset,
+ RING_FORCE_TO_NONPRIV(base, i),
+ false);
/* add in local MOCS registers */
for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
- GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+ ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+
+ return ret ? -1 : 0;
}
-static int guc_mmio_reg_state_query(struct intel_guc *guc)
+static long guc_mmio_reg_state_create(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- struct temp_regset temp_set;
- u32 total;
+ struct temp_regset temp_set = {};
+ long total = 0;
+ long ret;
- /*
- * Need to actually build the list in order to filter out
- * duplicates and other such data dependent constructions.
- */
- temp_set.size = MAX_MMIO_REGS;
- temp_set.registers = kmalloc_array(temp_set.size,
- sizeof(*temp_set.registers),
- GFP_KERNEL);
- if (!temp_set.registers)
- return -ENOMEM;
-
- total = 0;
for_each_engine(engine, gt, id) {
- guc_mmio_regset_init(&temp_set, engine);
- total += temp_set.used;
+ u32 used = temp_set.storage_used;
+
+ ret = guc_mmio_regset_init(&temp_set, engine);
+ if (ret < 0)
+ goto fail_regset_init;
+
+ guc->ads_regset_count[id] = temp_set.storage_used - used;
+ total += guc->ads_regset_count[id];
}
- kfree(temp_set.registers);
+ guc->ads_regset = temp_set.storage;
+
+ drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %zu KB for temporary ADS regset\n",
+ (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10);
return total * sizeof(struct guc_mmio_reg);
+
+fail_regset_init:
+ kfree(temp_set.storage);
+ return ret;
}
static void guc_mmio_reg_state_init(struct intel_guc *guc,
@@ -309,40 +370,38 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc,
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
+ struct guc_mmio_reg *ads_registers;
enum intel_engine_id id;
- struct temp_regset temp_set;
- struct guc_mmio_reg_set *ads_reg_set;
u32 addr_ggtt, offset;
- u8 guc_class;
offset = guc_ads_regset_offset(guc);
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
- temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
- temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]);
+ ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
+
+ memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size);
for_each_engine(engine, gt, id) {
+ u32 count = guc->ads_regset_count[id];
+ struct guc_mmio_reg_set *ads_reg_set;
+ u8 guc_class;
+
/* Class index is checked in class converter */
GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
guc_class = engine_class_to_guc_class(engine->class);
ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance];
- guc_mmio_regset_init(&temp_set, engine);
- if (!temp_set.used) {
+ if (!count) {
ads_reg_set->address = 0;
ads_reg_set->count = 0;
continue;
}
ads_reg_set->address = addr_ggtt;
- ads_reg_set->count = temp_set.used;
+ ads_reg_set->count = count;
- temp_set.size -= temp_set.used;
- temp_set.registers += temp_set.used;
- addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg);
+ addr_ggtt += count * sizeof(struct guc_mmio_reg);
}
-
- GEM_BUG_ON(temp_set.size);
}
static void fill_engine_enable_masks(struct intel_gt *gt,
@@ -501,6 +560,26 @@ static void guc_init_golden_context(struct intel_guc *guc)
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
}
+static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob)
+{
+ int i, j;
+ u32 addr_ggtt, offset;
+
+ offset = guc_ads_capture_offset(guc);
+ addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+ /* FIXME: Populate a proper capture list */
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+ for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+ blob->ads.capture_instance[i][j] = addr_ggtt;
+ blob->ads.capture_class[i][j] = addr_ggtt;
+ }
+
+ blob->ads.capture_global[i] = addr_ggtt;
+ }
+}
+
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -534,6 +613,9 @@ static void __guc_ads_init(struct intel_guc *guc)
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
+ /* Capture list for hang debug */
+ guc_capture_list_init(guc, blob);
+
/* ADS */
blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
@@ -561,8 +643,11 @@ int intel_guc_ads_create(struct intel_guc *guc)
GEM_BUG_ON(guc->ads_vma);
- /* Need to calculate the reg state size dynamically: */
- ret = guc_mmio_reg_state_query(guc);
+ /*
+ * Create reg state size dynamically on system memory to be copied to
+ * the final ads blob on gt init/reset
+ */
+ ret = guc_mmio_reg_state_create(guc);
if (ret < 0)
return ret;
guc->ads_regset_size = ret;
@@ -602,6 +687,7 @@ void intel_guc_ads_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
guc->ads_blob = NULL;
+ kfree(guc->ads_regset);
}
static void guc_ads_private_data_reset(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index aa6dd6415202..2f7fc87a78e1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -112,18 +112,6 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
init_waitqueue_head(&ct->wq);
}
-static inline const char *guc_ct_buffer_type_to_str(u32 type)
-{
- switch (type) {
- case GUC_CTB_TYPE_HOST2GUC:
- return "SEND";
- case GUC_CTB_TYPE_GUC2HOST:
- return "RECV";
- default:
- return "<invalid>";
- }
-}
-
static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc)
{
memset(desc, 0, sizeof(*desc));
@@ -156,71 +144,65 @@ static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
guc_ct_buffer_reset(ctb);
}
-static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type,
- u32 desc_addr, u32 buff_addr, u32 size)
+static int guc_action_control_ctb(struct intel_guc *guc, u32 control)
{
- u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = {
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
- FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB),
- FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) |
- FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type),
- FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr),
- FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr),
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, control),
};
int ret;
- GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
- GEM_BUG_ON(size % SZ_4K);
+ GEM_BUG_ON(control != GUC_CTB_CONTROL_DISABLE && control != GUC_CTB_CONTROL_ENABLE);
- /* CT registration must go over MMIO */
+ /* CT control must go over MMIO */
ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
return ret > 0 ? -EPROTO : ret;
}
-static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
- u32 desc_addr, u32 buff_addr, u32 size)
+static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
{
int err;
- err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO);
+ err = guc_action_control_ctb(ct_to_guc(ct), enable ?
+ GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
if (unlikely(err))
- return err;
+ CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
+ enabledisable(enable), ERR_PTR(err));
- err = guc_action_register_ct_buffer(ct_to_guc(ct), type,
- desc_addr, buff_addr, size);
- if (unlikely(err))
- CT_ERROR(ct, "Failed to register %s buffer (%pe)\n",
- guc_ct_buffer_type_to_str(type), ERR_PTR(err));
return err;
}
-static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
+static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
+ u32 desc_addr, u32 buff_addr, u32 size)
{
- u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = {
- FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
- FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
- FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB),
- FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type),
- };
- int ret;
-
- GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
-
- /* CT deregistration must go over MMIO */
- ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+ int err;
- return ret > 0 ? -EPROTO : ret;
-}
+ err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY :
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (unlikely(err))
+ goto failed;
-static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
-{
- int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
+ err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY :
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ buff_addr);
+ if (unlikely(err))
+ goto failed;
+ err = intel_guc_self_cfg32(ct_to_guc(ct), send ?
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY :
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
if (unlikely(err))
- CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n",
- guc_ct_buffer_type_to_str(type), ERR_PTR(err));
+failed:
+ CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+ send ? "SEND" : "RECV", ERR_PTR(err));
+
return err;
}
@@ -308,7 +290,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
int intel_guc_ct_enable(struct intel_guc_ct *ct)
{
struct intel_guc *guc = ct_to_guc(ct);
- u32 base, desc, cmds;
+ u32 base, desc, cmds, size;
void *blob;
int err;
@@ -333,27 +315,27 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
*/
desc = base + ptrdiff(ct->ctbs.recv.desc, blob);
cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob);
- err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST,
- desc, cmds, ct->ctbs.recv.size * 4);
-
+ size = ct->ctbs.recv.size * 4;
+ err = ct_register_buffer(ct, false, desc, cmds, size);
if (unlikely(err))
goto err_out;
desc = base + ptrdiff(ct->ctbs.send.desc, blob);
cmds = base + ptrdiff(ct->ctbs.send.cmds, blob);
- err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC,
- desc, cmds, ct->ctbs.send.size * 4);
+ size = ct->ctbs.send.size * 4;
+ err = ct_register_buffer(ct, true, desc, cmds, size);
+ if (unlikely(err))
+ goto err_out;
+ err = ct_control_enable(ct, true);
if (unlikely(err))
- goto err_deregister;
+ goto err_out;
ct->enabled = true;
ct->stall_time = KTIME_MAX;
return 0;
-err_deregister:
- ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
err_out:
CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
return err;
@@ -372,8 +354,7 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
ct->enabled = false;
if (intel_guc_is_fw_running(guc)) {
- ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC);
- ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
+ ct_control_enable(ct, false);
}
}
@@ -662,6 +643,7 @@ static int ct_send(struct intel_guc_ct *ct,
struct ct_request request;
unsigned long flags;
unsigned int sleep_period_ms = 1;
+ bool send_again;
u32 fence;
int err;
@@ -671,6 +653,9 @@ static int ct_send(struct intel_guc_ct *ct,
GEM_BUG_ON(!response_buf && response_buf_size);
might_sleep();
+resend:
+ send_again = false;
+
/*
* We use a lazy spin wait loop here as we believe that if the CT
* buffers are sized correctly the flow control condition should be
@@ -725,6 +710,13 @@ retry:
goto unlink;
}
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
+ FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
+ send_again = true;
+ goto unlink;
+ }
+
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
err = -EIO;
goto unlink;
@@ -747,6 +739,9 @@ unlink:
list_del(&request.link);
spin_unlock_irqrestore(&ct->requests.lock, flags);
+ if (unlikely(send_again))
+ goto resend;
+
return err;
}
@@ -789,7 +784,7 @@ static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords)
{
struct ct_incoming_msg *msg;
- msg = kmalloc(sizeof(*msg) + sizeof(u32) * num_dwords, GFP_ATOMIC);
+ msg = kmalloc(struct_size(msg, msg, num_dwords), GFP_ATOMIC);
if (msg)
msg->size = num_dwords;
return msg;
@@ -918,6 +913,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN);
GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC);
GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS &&
+ FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
@@ -990,9 +986,27 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
ret = intel_guc_context_reset_process_msg(guc, payload, len);
break;
+ case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = intel_guc_error_capture_process_msg(guc, payload, len);
+ if (unlikely(ret))
+ CT_ERROR(ct, "error capture notification failed %x %*ph\n",
+ action, 4 * len, payload);
+ break;
case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
ret = intel_guc_engine_failure_process_msg(guc, payload, len);
break;
+ case INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ intel_guc_log_handle_flush_event(&guc->log);
+ ret = 0;
+ break;
+ case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
+ CT_ERROR(ct, "Received GuC crash dump notification!\n");
+ ret = 0;
+ break;
+ case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
+ CT_ERROR(ct, "Received GuC exception notification!\n");
+ ret = 0;
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -1098,6 +1112,7 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
break;
case GUC_HXG_TYPE_RESPONSE_SUCCESS:
case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
err = ct_handle_response(ct, msg);
break;
default:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index dcb51b53b495..a0372735cddb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -16,13 +16,15 @@
static void guc_prepare_xfer(struct intel_uncore *uncore)
{
- u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
- GUC_ENABLE_READ_CACHE_LOGIC |
- GUC_ENABLE_MIA_CACHING |
+ u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
GUC_ENABLE_MIA_CLOCK_GATING;
+ if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50))
+ shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+ GUC_ENABLE_MIA_CACHING;
+
/* Must program this register before loading the ucode with DMA */
intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
@@ -91,11 +93,10 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
{
u32 val = intel_uncore_read(uncore, GUC_STATUS);
- u32 uk_val = val & GS_UKERNEL_MASK;
+ u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
*status = val;
- return (uk_val == GS_UKERNEL_READY) ||
- ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE));
+ return uk_val == INTEL_GUC_LOAD_STATUS_READY;
}
static int guc_wait_ucode(struct intel_uncore *uncore)
@@ -106,17 +107,26 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
/*
* Wait for the GuC to start up.
* NB: Docs recommend not using the interrupt for completion.
- * Measurements indicate this should take no more than 20ms, so a
+ * Measurements indicate this should take no more than 20ms
+ * (assuming the GT clock is at maximum frequency). So, a
* timeout here indicates that the GuC has failed and is unusable.
* (Higher levels of the driver may decide to reset the GuC and
* attempt the ucode load again if this happens.)
+ *
+ * FIXME: There is a known (but exceedingly unlikely) race condition
+ * where the asynchronous frequency management code could reduce
+ * the GT clock while a GuC reload is in progress (during a full
+ * GT reset). A fix is in progress but there are complex locking
+ * issues to be resolved. In the meantime bump the timeout to
+ * 200ms. Even at slowest clock, this should be sufficient. And
+ * in the working case, a larger timeout makes no difference.
*/
- ret = wait_for(guc_ready(uncore, &status), 100);
+ ret = wait_for(guc_ready(uncore, &status), 200);
if (ret) {
struct drm_device *drm = &uncore->i915->drm;
- drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status);
- drm_dbg(drm, "GuC load failed: status: Reset = %d, "
+ drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+ drm_info(drm, "GuC load failed: status: Reset = %d, "
"BootROM = 0x%02X, UKernel = 0x%02X, "
"MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status),
@@ -126,13 +136,13 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- drm_dbg(drm, "GuC firmware signature verification failed\n");
+ drm_info(drm, "GuC firmware signature verification failed\n");
ret = -ENOEXEC;
}
- if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
- drm_dbg(drm, "GuC firmware exception. EIP: %#x\n",
- intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+ if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+ drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+ intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 7072e30e99f4..6a4612a852e2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -16,6 +16,7 @@
#include "abi/guc_errors_abi.h"
#include "abi/guc_communication_mmio_abi.h"
#include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
#include "abi/guc_messages_abi.h"
/* Payload length only i.e. don't include G2H header length */
@@ -84,19 +85,24 @@
#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
#define GUC_CTL_LOG_PARAMS 0
-#define GUC_LOG_VALID (1 << 0)
-#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1)
-#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3)
+#define GUC_LOG_VALID BIT(0)
+#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
+#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
+#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
#define GUC_LOG_CRASH_SHIFT 4
#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
#define GUC_LOG_DEBUG_SHIFT 6
#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT)
+#define GUC_LOG_CAPTURE_SHIFT 10
+#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT)
#define GUC_LOG_BUF_ADDR_SHIFT 12
#define GUC_CTL_WA 1
+#define GUC_WA_POLLCS BIT(18)
+
#define GUC_CTL_FEATURE 2
-#define GUC_CTL_DISABLE_SCHEDULER (1 << 14)
#define GUC_CTL_ENABLE_SLPC BIT(2)
+#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
#define GUC_LOG_VERBOSITY_SHIFT 0
@@ -116,6 +122,8 @@
#define GUC_ADS_ADDR_SHIFT 1
#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+#define GUC_CTL_DEVID 5
+
#define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */
/* Generic GT SysInfo data types */
@@ -263,7 +271,10 @@ struct guc_mmio_reg {
u32 offset;
u32 value;
u32 flags;
-#define GUC_REGSET_MASKED (1 << 0)
+ u32 mask;
+#define GUC_REGSET_MASKED BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
+#define GUC_REGSET_RESTORE_ONLY BIT(3)
} __packed;
/* GuC register sets */
@@ -280,6 +291,12 @@ struct guc_gt_system_info {
u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
} __packed;
+enum {
+ GUC_CAPTURE_LIST_INDEX_PF = 0,
+ GUC_CAPTURE_LIST_INDEX_VF = 1,
+ GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
/* GuC Additional Data Struct */
struct guc_ads {
struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
@@ -291,7 +308,11 @@ struct guc_ads {
u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
u32 private_data;
- u32 reserved[15];
+ u32 reserved2;
+ u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+ u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+ u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+ u32 reserved[14];
} __packed;
/* Engine usage stats */
@@ -312,6 +333,7 @@ struct guc_engine_usage {
enum guc_log_buffer_type {
GUC_DEBUG_LOG_BUFFER,
GUC_CRASH_DUMP_LOG_BUFFER,
+ GUC_CAPTURE_LOG_BUFFER,
GUC_MAX_LOG_BUFFER
};
@@ -342,6 +364,7 @@ struct guc_log_buffer_state {
u32 write_ptr;
u32 size;
u32 sampled_write_ptr;
+ u32 wrap_offset;
union {
struct {
u32 flush_to_file:1;
@@ -382,7 +405,7 @@ struct guc_shared_ctx_data {
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum intel_guc_recv_message {
INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
- INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3)
+ INTEL_GUC_RECV_MSG_EXCEPTION = BIT(30),
};
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 7b0b43e87244..b53f61f3101f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -56,20 +56,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
-static void guc_log_enable_flush_events(struct intel_guc_log *log)
-{
- intel_guc_enable_msg(log_to_guc(log),
- INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
- INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
-static void guc_log_disable_flush_events(struct intel_guc_log *log)
-{
- intel_guc_disable_msg(log_to_guc(log),
- INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
- INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
/*
* Sub buffer switch callback. Called whenever relay has to switch to a new
* sub buffer, relay stays on the same sub buffer if 0 is returned.
@@ -202,6 +188,8 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
return DEBUG_BUFFER_SIZE;
case GUC_CRASH_DUMP_LOG_BUFFER:
return CRASH_BUFFER_SIZE;
+ case GUC_CAPTURE_LOG_BUFFER:
+ return CAPTURE_BUFFER_SIZE;
default:
MISSING_CASE(type);
}
@@ -464,14 +452,19 @@ int intel_guc_log_create(struct intel_guc_log *log)
* +-------------------------------+ 32B
* | Debug state header |
* +-------------------------------+ 64B
+ * | Capture state header |
+ * +-------------------------------+ 96B
* | |
* +===============================+ PAGE_SIZE (4KB)
* | Crash Dump logs |
* +===============================+ + CRASH_SIZE
* | Debug logs |
* +===============================+ + DEBUG_SIZE
+ * | Capture logs |
+ * +===============================+ + CAPTURE_SIZE
*/
- guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE;
+ guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+ CAPTURE_BUFFER_SIZE;
vma = intel_guc_allocate_vma(guc, guc_log_size);
if (IS_ERR(vma)) {
@@ -593,8 +586,6 @@ int intel_guc_log_relay_start(struct intel_guc_log *log)
if (log->relay.started)
return -EEXIST;
- guc_log_enable_flush_events(log);
-
/*
* When GuC is logging without us relaying to userspace, we're ignoring
* the flush notification. This means that we need to unconditionally
@@ -641,7 +632,6 @@ static void guc_log_relay_stop(struct intel_guc_log *log)
if (!log->relay.started)
return;
- guc_log_disable_flush_events(log);
intel_synchronize_irq(i915);
flush_work(&log->relay.flush_work);
@@ -662,7 +652,8 @@ void intel_guc_log_relay_close(struct intel_guc_log *log)
void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
{
- queue_work(system_highpri_wq, &log->relay.flush_work);
+ if (log->relay.started)
+ queue_work(system_highpri_wq, &log->relay.flush_work);
}
static const char *
@@ -673,6 +664,8 @@ stringify_guc_log_type(enum guc_log_buffer_type type)
return "DEBUG";
case GUC_CRASH_DUMP_LOG_BUFFER:
return "CRASH";
+ case GUC_CAPTURE_LOG_BUFFER:
+ return "CAPTURE";
default:
MISSING_CASE(type);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index fe6ab7550a14..d7e1b6471fed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -18,12 +18,15 @@ struct intel_guc;
#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define CRASH_BUFFER_SIZE SZ_2M
#define DEBUG_BUFFER_SIZE SZ_16M
+#define CAPTURE_BUFFER_SIZE SZ_4M
#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
#define CRASH_BUFFER_SIZE SZ_1M
#define DEBUG_BUFFER_SIZE SZ_2M
+#define CAPTURE_BUFFER_SIZE SZ_1M
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
+#define CAPTURE_BUFFER_SIZE SZ_16K
#endif
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 85846c5570c5..66027a42cda9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -22,10 +22,6 @@
#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
#define GS_UKERNEL_SHIFT 8
#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
-#define GS_UKERNEL_LAPIC_DONE (0x30 << GS_UKERNEL_SHIFT)
-#define GS_UKERNEL_DPC_ERROR (0x60 << GS_UKERNEL_SHIFT)
-#define GS_UKERNEL_EXCEPTION (0x70 << GS_UKERNEL_SHIFT)
-#define GS_UKERNEL_READY (0xF0 << GS_UKERNEL_SHIFT)
#define GS_MIA_SHIFT 16
#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT)
#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT)
@@ -98,6 +94,9 @@
#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15)
#define GUC_GEN10_SHIM_WC_ENABLE (1<<21)
+#define GUC_SHIM_CONTROL2 _MMIO(0xc068)
+#define GUC_IS_PRIVILEGED (1<<29)
+
#define GUC_SEND_INTERRUPT _MMIO(0xc4c8)
#define GUC_SEND_TRIGGER (1<<0)
#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 04b8321fc758..b3a429a92c0d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1430,7 +1430,8 @@ submission_disabled(struct intel_guc *guc)
struct i915_sched_engine * const sched_engine = guc->sched_engine;
return unlikely(!sched_engine ||
- !__tasklet_is_enabled(&sched_engine->tasklet));
+ !__tasklet_is_enabled(&sched_engine->tasklet) ||
+ intel_gt_is_wedged(guc_to_gt(guc)));
}
static void disable_submission(struct intel_guc *guc)
@@ -1475,8 +1476,6 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
{
- int i;
-
if (unlikely(!guc_submission_initialized(guc))) {
/* Reset called during driver load? GuC not yet initialised! */
return;
@@ -1493,21 +1492,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
-
- /*
- * Handle any outstanding G2Hs before reset. Call IRQ handler directly
- * each pass as interrupt have been disabled. We always scrub for
- * outstanding G2H as it is possible for outstanding_submission_g2h to
- * be incremented after the context state update.
- */
- for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
- intel_guc_to_host_event_handler(guc);
-#define wait_for_reset(guc, wait_var) \
- intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
- do {
- wait_for_reset(guc, &guc->outstanding_submission_g2h);
- } while (!list_empty(&guc->ct.requests.incoming));
- }
+ flush_work(&guc->ct.requests.worker);
scrub_guc_desc_for_outstanding_g2h(guc);
}
@@ -1612,7 +1597,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
unsigned long flags;
u32 head;
int i, number_children = ce->parallel.number_children;
- bool skip = false;
struct intel_context *parent = ce;
GEM_BUG_ON(intel_context_is_child(ce));
@@ -1623,23 +1607,10 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
* GuC will implicitly mark the context as non-schedulable when it sends
* the reset notification. Make sure our state reflects this change. The
* context will be marked enabled on resubmission.
- *
- * XXX: If the context is reset as a result of the request cancellation
- * this G2H is received after the schedule disable complete G2H which is
- * wrong as this creates a race between the request cancellation code
- * re-submitting the context and this G2H handler. This is a bug in the
- * GuC but can be worked around in the meantime but converting this to a
- * NOP if a pending enable is in flight as this indicates that a request
- * cancellation has occurred.
*/
spin_lock_irqsave(&ce->guc_state.lock, flags);
- if (likely(!context_pending_enable(ce)))
- clr_context_enabled(ce);
- else
- skip = true;
+ clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(skip))
- goto out_put;
/*
* For each context in the relationship find the hanging request
@@ -1671,7 +1642,6 @@ next_context:
}
__unwind_incomplete_requests(parent);
-out_put:
intel_context_put(parent);
}
@@ -1806,7 +1776,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
- test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
+ intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
@@ -1825,6 +1795,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
}
static void destroyed_worker_func(struct work_struct *w);
+static void reset_fail_worker_func(struct work_struct *w);
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
@@ -1855,6 +1826,8 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
INIT_WORK(&guc->submission_state.destroyed_worker,
destroyed_worker_func);
+ INIT_WORK(&guc->submission_state.reset_fail_worker,
+ reset_fail_worker_func);
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
@@ -2611,12 +2584,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
true);
}
- /*
- * XXX: Racey if context is reset, see comment in
- * __guc_reset_context().
- */
- flush_work(&ce_to_guc(ce)->ct.requests.worker);
-
guc_context_unblock(block_context);
intel_context_put(ce);
}
@@ -3330,8 +3297,6 @@ static void guc_parent_context_unpin(struct intel_context *ce)
GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
- if (ce->parallel.last_rq)
- i915_request_put(ce->parallel.last_rq);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
}
@@ -4053,14 +4018,14 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
- /*
- * XXX: Racey if request cancellation has occurred, see comment in
- * __guc_reset_context().
- */
- if (likely(!intel_context_is_banned(ce) &&
- !context_blocked(ce))) {
+ if (likely(!intel_context_is_banned(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
+ } else {
+ drm_err(&guc_to_gt(guc)->i915->drm,
+ "Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
+ ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
+ context_blocked(ce));
}
}
@@ -4099,6 +4064,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
return 0;
}
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+ const u32 *msg, u32 len)
+{
+ int status;
+
+ if (unlikely(len != 1)) {
+ drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ status = msg[0];
+ drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+
+ /* FIXME: Do something with the capture */
+
+ return 0;
+}
+
static struct intel_engine_cs *
guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
{
@@ -4111,6 +4094,26 @@ guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
return gt->engine_class[engine_class][instance];
}
+static void reset_fail_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc,
+ submission_state.reset_fail_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_engine_mask_t reset_fail_mask;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ reset_fail_mask = guc->submission_state.reset_fail_mask;
+ guc->submission_state.reset_fail_mask = 0;
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (likely(reset_fail_mask))
+ intel_gt_handle_error(gt, reset_fail_mask,
+ I915_ERROR_CAPTURE,
+ "GuC failed to reset engine mask=0x%x\n",
+ reset_fail_mask);
+}
+
int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
@@ -4118,6 +4121,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
+ unsigned long flags;
if (unlikely(len != 3)) {
drm_err(&gt->i915->drm, "Invalid length %u", len);
@@ -4142,10 +4146,15 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
guc_class, instance, engine->name, reason);
- intel_gt_handle_error(gt, engine->mask,
- I915_ERROR_CAPTURE,
- "GuC failed to reset %s (reason=0x%08x)\n",
- engine->name, reason);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ guc->submission_state.reset_fail_mask |= engine->mask;
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ /*
+ * A GT reset flushes this worker queue (G2H handler) so we must use
+ * another worker to trigger a GT reset.
+ */
+ queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
return 0;
}
@@ -4514,27 +4523,31 @@ static inline bool skip_handshake(struct i915_request *rq)
return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
}
+#define NON_SKIP_LEN 6
static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
u32 *cs)
{
struct intel_context *ce = rq->context;
+ __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+ __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
GEM_BUG_ON(!intel_context_is_parent(ce));
if (unlikely(skip_handshake(rq))) {
/*
* NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
- * the -6 comes from the length of the emits below.
+ * the NON_SKIP_LEN comes from the length of the emits below.
*/
memset(cs, 0, sizeof(u32) *
- (ce->engine->emit_fini_breadcrumb_dw - 6));
- cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+ cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
} else {
cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
}
/* Emit fini breadcrumb */
+ before_fini_breadcrumb_user_interrupt_cs = cs;
cs = gen8_emit_ggtt_write(cs,
rq->fence.seqno,
i915_request_active_timeline(rq)->hwsp_offset,
@@ -4544,6 +4557,12 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
+ /* Ensure our math for skip + emit is correct */
+ GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+ cs);
+ GEM_BUG_ON(start_fini_breadcrumb_cs +
+ ce->engine->emit_fini_breadcrumb_dw != cs);
+
rq->tail = intel_ring_offset(rq, cs);
return cs;
@@ -4586,22 +4605,25 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
u32 *cs)
{
struct intel_context *ce = rq->context;
+ __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+ __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
GEM_BUG_ON(!intel_context_is_child(ce));
if (unlikely(skip_handshake(rq))) {
/*
* NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
- * the -6 comes from the length of the emits below.
+ * the NON_SKIP_LEN comes from the length of the emits below.
*/
memset(cs, 0, sizeof(u32) *
- (ce->engine->emit_fini_breadcrumb_dw - 6));
- cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+ cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
} else {
cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
}
/* Emit fini breadcrumb */
+ before_fini_breadcrumb_user_interrupt_cs = cs;
cs = gen8_emit_ggtt_write(cs,
rq->fence.seqno,
i915_request_active_timeline(rq)->hwsp_offset,
@@ -4611,11 +4633,19 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
+ /* Ensure our math for skip + emit is correct */
+ GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+ cs);
+ GEM_BUG_ON(start_fini_breadcrumb_cs +
+ ce->engine->emit_fini_breadcrumb_dw != cs);
+
rq->tail = intel_ring_offset(rq, cs);
return cs;
}
+#undef NON_SKIP_LEN
+
static struct intel_context *
guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d10b227ac4aa..556829de9c17 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -124,6 +124,7 @@ int intel_huc_auth(struct intel_huc *huc)
}
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ drm_info(&gt->i915->drm, "HuC authenticated\n");
return 0;
fail:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 09ed29df67bc..da199aa6989f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -432,6 +432,15 @@ static int __uc_check_hw(struct intel_uc *uc)
return 0;
}
+static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
+{
+ struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+
+ drm_info(&i915->drm, "%s firmware %s version %u.%u\n",
+ intel_uc_fw_type_repr(fw->type), fw->path,
+ fw->major_ver_found, fw->minor_ver_found);
+}
+
static int __uc_init_hw(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
@@ -442,6 +451,11 @@ static int __uc_init_hw(struct intel_uc *uc)
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
+ print_fw_ver(uc, &guc->fw);
+
+ if (intel_uc_uses_huc(uc))
+ print_fw_ver(uc, &huc->fw);
+
if (!intel_uc_fw_is_loadable(&guc->fw)) {
ret = __uc_check_hw(uc) ||
intel_uc_fw_is_overridden(&guc->fw) ||
@@ -507,24 +521,11 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
}
- drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
- intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
- guc->fw.major_ver_found, guc->fw.minor_ver_found,
- "submission",
+ drm_info(&i915->drm, "GuC submission %s\n",
enableddisabled(intel_uc_uses_guc_submission(uc)));
-
- drm_info(&i915->drm, "GuC SLPC: %s\n",
+ drm_info(&i915->drm, "GuC SLPC %s\n",
enableddisabled(intel_uc_uses_guc_slpc(uc)));
- if (intel_uc_uses_huc(uc)) {
- drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
- intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
- huc->fw.path,
- huc->fw.major_ver_found, huc->fw.minor_ver_found,
- "authenticated",
- yesno(intel_huc_is_authenticated(huc)));
- }
-
return 0;
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 549d5919dc70..c88113044494 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -52,21 +52,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* firmware as TGL.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
- fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \
- fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \
- fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \
- fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \
- fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \
- fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0))
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \
+ fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \
+ fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3))
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
@@ -451,20 +451,19 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma *dummy = &uc_fw->dummy;
+ struct i915_vma_resource *dummy = &uc_fw->dummy;
u32 pte_flags = 0;
- dummy->node.start = uc_fw_ggtt_offset(uc_fw);
- dummy->node.size = obj->base.size;
- dummy->pages = obj->mm.pages;
- dummy->vm = &ggtt->vm;
+ dummy->start = uc_fw_ggtt_offset(uc_fw);
+ dummy->node_size = obj->base.size;
+ dummy->bi.pages = obj->mm.pages;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
+ GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */
if (i915_gem_object_has_struct_page(obj))
- drm_clflush_sg(dummy->pages);
+ drm_clflush_sg(dummy->bi.pages);
if (i915_gem_object_is_lmem(obj))
pte_flags |= PTE_LM;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index d9d1dc0b4cbb..3229018877d3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -85,7 +85,7 @@ struct intel_uc_fw {
* threaded as it done during driver load (inherently single threaded)
* or during a GT reset (mutex guarantees single threaded).
*/
- struct i915_vma dummy;
+ struct i915_vma_resource dummy;
struct i915_vma *rsa_data;
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index d3327b802b76..a115894d5896 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -157,7 +157,7 @@ static int intel_guc_steal_guc_ids(void *arg)
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
engine = intel_selftest_find_any_engine(gt);
sv = guc->submission_state.num_guc_ids;
- guc->submission_state.num_guc_ids = 4096;
+ guc->submission_state.num_guc_ids = 512;
/* Create spinner to block requests in below loop */
ce[context_index] = intel_context_create(engine);
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 6b3dedd321bb..557f3314291a 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -64,7 +64,7 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
mutex_lock(&gt->ggtt->vm.mutex);
mmio_hw_access_pre(gt);
- ret = i915_gem_gtt_insert(&gt->ggtt->vm, node,
+ ret = i915_gem_gtt_insert(&gt->ggtt->vm, NULL, node,
size, I915_GTT_PAGE_SIZE,
I915_COLOR_UNEVICTABLE,
start, end, flags);
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 12b7c36d8e8f..c95c25d2addb 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -90,7 +90,7 @@ static int vgpu_gem_get_pages(
kfree(st);
return ret;
}
- gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+ gtt_entries = (gen8_pte_t __iomem *)to_gt(dev_priv)->ggtt->gsm +
(fb_info->start >> PAGE_SHIFT);
for_each_sg(st->sgl, sg, page_num, i) {
dma_addr_t dma_addr =
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 54c66d4b6887..946bbe57bfe5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -183,7 +183,8 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, " (%s offset: %08llx, size: %08llx, pages: %s",
stringify_vma_type(vma),
vma->node.start, vma->node.size,
- stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
+ stringify_page_sizes(vma->resource->page_sizes_gtt,
+ NULL, 0));
if (i915_vma_is_ggtt(vma) || i915_vma_is_dpt(vma)) {
switch (vma->ggtt_view.type) {
case I915_GGTT_VIEW_NORMAL:
@@ -403,9 +404,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
intel_wakeref_t wakeref;
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
- swizzle_string(dev_priv->ggtt.bit_6_swizzle_x));
+ swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_x));
seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
- swizzle_string(dev_priv->ggtt.bit_6_swizzle_y));
+ swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_y));
if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
seq_puts(m, "L-shaped memory detected\n");
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index a892a1b546a8..1c67ff735f18 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -577,6 +577,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
i915_perf_init(dev_priv);
+ ret = intel_gt_assign_ggtt(to_gt(dev_priv));
+ if (ret)
+ goto err_perf;
+
ret = i915_ggtt_probe_hw(dev_priv);
if (ret)
goto err_perf;
@@ -593,8 +597,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
if (ret)
goto err_ggtt;
- intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt);
-
ret = intel_gt_probe_lmem(to_gt(dev_priv));
if (ret)
goto err_mem_regions;
@@ -1152,7 +1154,7 @@ static int i915_drm_suspend(struct drm_device *dev)
/* Must be called before GGTT is suspended. */
intel_dpt_suspend(dev_priv);
- i915_ggtt_suspend(&dev_priv->ggtt);
+ i915_ggtt_suspend(to_gt(dev_priv)->ggtt);
i915_save_display(dev_priv);
@@ -1276,7 +1278,7 @@ static int i915_drm_resume(struct drm_device *dev)
if (ret)
drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
- i915_ggtt_resume(&dev_priv->ggtt);
+ i915_ggtt_resume(to_gt(dev_priv)->ggtt);
/* Must be called after GGTT is resumed. */
intel_dpt_resume(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ffde71b6b3f1..f600d1cb01b3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -641,8 +641,6 @@ struct drm_i915_private {
struct drm_atomic_state *modeset_restore_state;
struct drm_modeset_acquire_ctx reset_ctx;
- struct i915_ggtt ggtt; /* VM representing the global address space */
-
struct i915_gem_mm mm;
/* Kernel Modesetting */
@@ -1089,6 +1087,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
#define IS_DG2_G11(dev_priv) \
IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11)
+#define IS_DG2_G12(dev_priv) \
+ IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12)
#define IS_ADLS_RPLS(dev_priv) \
IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL_S)
#define IS_ADLP_N(dev_priv) \
@@ -1205,16 +1205,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
/*
- * DG2 hardware steppings are a bit unusual. The hardware design was forked
- * to create two variants (G10 and G11) which have distinct workaround sets.
- * The G11 fork of the DG2 design resets the GT stepping back to "A0" for its
- * first iteration, even though it's more similar to a G10 B0 stepping in terms
- * of functionality and workarounds. However the display stepping does not
- * reset in the same manner --- a specific stepping like "B0" has a consistent
- * meaning regardless of whether it belongs to a G10 or G11 DG2.
+ * DG2 hardware steppings are a bit unusual. The hardware design was forked to
+ * create three variants (G10, G11, and G12) which each have distinct
+ * workaround sets. The G11 and G12 forks of the DG2 design reset the GT
+ * stepping back to "A0" for their first iterations, even though they're more
+ * similar to a G10 B0 stepping and G10 C0 stepping respectively in terms of
+ * functionality and workarounds. However the display stepping does not reset
+ * in the same manner --- a specific stepping like "B0" has a consistent
+ * meaning regardless of whether it belongs to a G10, G11, or G12 DG2.
*
* TLDR: All GT workarounds and stepping-specific logic must be applied in
- * relation to a specific subplatform (G10 or G11), whereas display workarounds
+ * relation to a specific subplatform (G10/G11/G12), whereas display workarounds
* and stepping-specific logic will be applied with a general DG2-wide stepping
* number.
*/
@@ -1384,6 +1385,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define INTEL_DISPLAY_ENABLED(dev_priv) \
(drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display)
+#define HAS_GUC_DEPRIVILEGE(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_guc_deprivilege)
+
static inline bool run_as_guest(void)
{
return !hypervisor_is_type(X86_HYPER_NATIVE);
@@ -1480,6 +1484,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
#define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
#define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
+#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
@@ -1498,7 +1503,7 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+ return to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
i915_gem_object_is_tiled(obj);
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index db897fb11f9b..2e10187cd0a0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -92,7 +92,8 @@ int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
@@ -122,6 +123,8 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
+ assert_object_held(obj);
+
if (list_empty(&obj->vma.list))
return 0;
@@ -159,10 +162,16 @@ try_again:
spin_unlock(&obj->vma.lock);
if (vma) {
+ bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
ret = -EBUSY;
- if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
- !i915_vma_is_active(vma)) {
- if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+ if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+ assert_object_held(vma->obj);
+ ret = i915_vma_unbind_async(vma, vm_trylock);
+ }
+
+ if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+ !i915_vma_is_active(vma))) {
+ if (vm_trylock) {
if (mutex_trylock(&vma->vm->mutex)) {
ret = __i915_vma_unbind(vma);
mutex_unlock(&vma->vm->mutex);
@@ -293,7 +302,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj,
bool write)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct i915_vma *vma;
struct i915_gem_ww_ctx ww;
int ret;
@@ -354,7 +363,7 @@ static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj,
struct i915_vma *vma)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
i915_gem_object_unpin_pages(obj);
if (drm_mm_node_allocated(node)) {
@@ -370,7 +379,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pread *args)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
intel_wakeref_t wakeref;
struct drm_mm_node node;
void __user *user_data;
@@ -526,7 +535,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *args)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_runtime_pm *rpm = &i915->runtime_pm;
intel_wakeref_t wakeref;
struct drm_mm_node node;
@@ -827,7 +836,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
*/
list_for_each_entry_safe(obj, on,
- &i915->ggtt.userfault_list, userfault_link)
+ &to_gt(i915)->ggtt->userfault_list, userfault_link)
__i915_gem_object_release_mmap_gtt(obj);
/*
@@ -835,8 +844,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
* in use by hardware (i.e. they are pinned), we should not be powering
* down! All other fences will be reacquired by the user upon waking.
*/
- for (i = 0; i < i915->ggtt.num_fences; i++) {
- struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+ for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) {
+ struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i];
/*
* Ideally we want to assert that the fence register is not
@@ -877,7 +886,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct i915_vma *vma;
int ret;
@@ -1127,7 +1136,7 @@ err_unlock:
/* Minimal basic recovery for KMS */
ret = i915_ggtt_enable_hw(dev_priv);
- i915_ggtt_resume(&dev_priv->ggtt);
+ i915_ggtt_resume(to_gt(dev_priv)->ggtt);
intel_init_clock_gating(dev_priv);
}
@@ -1150,7 +1159,7 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
{
- intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
+ intel_wakeref_auto_fini(&to_gt(dev_priv)->ggtt->userfault_wakeref);
i915_gem_suspend_late(dev_priv);
intel_gt_driver_remove(to_gt(dev_priv));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 24eee0c2055f..f025ee4fa526 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -38,6 +38,11 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
bool fail_if_busy:1;
} igt_evict_ctl;)
+static bool dying_vma(struct i915_vma *vma)
+{
+ return !kref_read(&vma->obj->base.refcount);
+}
+
static int ggtt_flush(struct intel_gt *gt)
{
/*
@@ -50,8 +55,37 @@ static int ggtt_flush(struct intel_gt *gt)
return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
}
+static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
+{
+ /*
+ * We add the extra refcount so the object doesn't drop to zero until
+ * after ungrab_vma(), this way trylock is always paired with unlock.
+ */
+ if (i915_gem_object_get_rcu(vma->obj)) {
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
+ i915_gem_object_put(vma->obj);
+ return false;
+ }
+ } else {
+ /* Dead objects don't need pins */
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ }
+
+ return true;
+}
+
+static void ungrab_vma(struct i915_vma *vma)
+{
+ if (dying_vma(vma))
+ return;
+
+ i915_gem_object_unlock(vma->obj);
+ i915_gem_object_put(vma->obj);
+}
+
static bool
mark_free(struct drm_mm_scan *scan,
+ struct i915_gem_ww_ctx *ww,
struct i915_vma *vma,
unsigned int flags,
struct list_head *unwind)
@@ -59,6 +93,9 @@ mark_free(struct drm_mm_scan *scan,
if (i915_vma_is_pinned(vma))
return false;
+ if (!grab_vma(vma, ww))
+ return false;
+
list_add(&vma->evict_link, unwind);
return drm_mm_scan_add_block(scan, &vma->node);
}
@@ -77,6 +114,7 @@ static bool defer_evict(struct i915_vma *vma)
/**
* i915_gem_evict_something - Evict vmas to make room for binding a new one
* @vm: address space to evict from
+ * @ww: An optional struct i915_gem_ww_ctx.
* @min_size: size of the desired free space
* @alignment: alignment constraint of the desired free space
* @color: color for the desired space
@@ -99,6 +137,7 @@ static bool defer_evict(struct i915_vma *vma)
*/
int
i915_gem_evict_something(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
u64 min_size, u64 alignment,
unsigned long color,
u64 start, u64 end,
@@ -171,7 +210,7 @@ search_again:
continue;
}
- if (mark_free(&scan, vma, flags, &eviction_list))
+ if (mark_free(&scan, ww, vma, flags, &eviction_list))
goto found;
}
@@ -179,6 +218,7 @@ search_again:
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
ret = drm_mm_scan_remove_block(&scan, &vma->node);
BUG_ON(ret);
+ ungrab_vma(vma);
}
/*
@@ -223,10 +263,12 @@ found:
* of any of our objects, thus corrupting the list).
*/
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
- if (drm_mm_scan_remove_block(&scan, &vma->node))
+ if (drm_mm_scan_remove_block(&scan, &vma->node)) {
__i915_vma_pin(vma);
- else
+ } else {
list_del(&vma->evict_link);
+ ungrab_vma(vma);
+ }
}
/* Unbinding will emit any required flushes */
@@ -235,16 +277,20 @@ found:
__i915_vma_unpin(vma);
if (ret == 0)
ret = __i915_vma_unbind(vma);
+ ungrab_vma(vma);
}
while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
vma = container_of(node, struct i915_vma, node);
/* If we find any non-objects (!vma), we cannot evict them */
- if (vma->node.color != I915_COLOR_UNEVICTABLE)
+ if (vma->node.color != I915_COLOR_UNEVICTABLE &&
+ grab_vma(vma, ww)) {
ret = __i915_vma_unbind(vma);
- else
- ret = -ENOSPC; /* XXX search failed, try again? */
+ ungrab_vma(vma);
+ } else {
+ ret = -ENOSPC;
+ }
}
return ret;
@@ -253,6 +299,7 @@ found:
/**
* i915_gem_evict_for_node - Evict vmas to make room for binding a new one
* @vm: address space to evict from
+ * @ww: An optional struct i915_gem_ww_ctx.
* @target: range (and color) to evict for
* @flags: additional flags to control the eviction algorithm
*
@@ -262,6 +309,7 @@ found:
* memory in e.g. the shrinker.
*/
int i915_gem_evict_for_node(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *target,
unsigned int flags)
{
@@ -334,6 +382,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
break;
}
+ if (!grab_vma(vma, ww)) {
+ ret = -ENOSPC;
+ break;
+ }
+
/*
* Never show fear in the face of dragons!
*
@@ -351,6 +404,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
__i915_vma_unpin(vma);
if (ret == 0)
ret = __i915_vma_unbind(vma);
+
+ ungrab_vma(vma);
}
return ret;
@@ -359,6 +414,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
/**
* i915_gem_evict_vm - Evict all idle vmas from a vm
* @vm: Address space to cleanse
+ * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
+ * will be able to evict vma's locked by the ww as well.
*
* This function evicts all vmas from a vm.
*
@@ -368,7 +425,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* To clarify: This is for freeing up virtual address space, not for freeing
* memory in e.g. the shrinker.
*/
-int i915_gem_evict_vm(struct i915_address_space *vm)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
{
int ret = 0;
@@ -389,24 +446,52 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
do {
struct i915_vma *vma, *vn;
LIST_HEAD(eviction_list);
+ LIST_HEAD(locked_eviction_list);
list_for_each_entry(vma, &vm->bound_list, vm_link) {
if (i915_vma_is_pinned(vma))
continue;
+ /*
+ * If we already own the lock, trylock fails. In case
+ * the resv is shared among multiple objects, we still
+ * need the object ref.
+ */
+ if (dying_vma(vma) ||
+ (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
+ __i915_vma_pin(vma);
+ list_add(&vma->evict_link, &locked_eviction_list);
+ continue;
+ }
+
+ if (!i915_gem_object_trylock(vma->obj, ww))
+ continue;
+
__i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list);
}
- if (list_empty(&eviction_list))
+ if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
break;
ret = 0;
+ /* Unbind locked objects first, before unlocking the eviction_list */
+ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
+ __i915_vma_unpin(vma);
+
+ if (ret == 0)
+ ret = __i915_vma_unbind(vma);
+ if (ret != -EINTR) /* "Get me out of here!" */
+ ret = 0;
+ }
+
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
if (ret == 0)
ret = __i915_vma_unbind(vma);
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
+
+ i915_gem_object_unlock(vma->obj);
}
} while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
index d4478b6ad11b..e593c530f9bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
@@ -10,15 +10,19 @@
struct drm_mm_node;
struct i915_address_space;
+struct i915_gem_ww_ctx;
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
u64 min_size, u64 alignment,
unsigned long color,
u64 start, u64 end,
unsigned flags);
int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *node,
unsigned int flags);
-int i915_gem_evict_vm(struct i915_address_space *vm);
+int i915_gem_evict_vm(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww);
#endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8a7f0d92b56f..329ff75b80b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -57,7 +57,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
/* XXX This does not prevent more requests being submitted! */
if (unlikely(ggtt->do_idle_maps))
@@ -71,6 +71,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
/**
* i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
* @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
* @node: the &struct drm_mm_node (typically i915_vma.mode)
* @size: how much space to allocate inside the GTT,
* must be #I915_GTT_PAGE_SIZE aligned
@@ -94,6 +95,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
* asked to wait for eviction and interrupted.
*/
int i915_gem_gtt_reserve(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *node,
u64 size, u64 offset, unsigned long color,
unsigned int flags)
@@ -104,7 +106,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
GEM_BUG_ON(range_overflows(offset, size, vm->total));
- GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+ GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
GEM_BUG_ON(drm_mm_node_allocated(node));
node->size = size;
@@ -118,7 +120,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
if (flags & PIN_NOEVICT)
return -ENOSPC;
- err = i915_gem_evict_for_node(vm, node, flags);
+ err = i915_gem_evict_for_node(vm, ww, node, flags);
if (err == 0)
err = drm_mm_reserve_node(&vm->mm, node);
@@ -153,6 +155,7 @@ static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
/**
* i915_gem_gtt_insert - insert a node into an address_space (GTT)
* @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
* @node: the &struct drm_mm_node (typically i915_vma.node)
* @size: how much space to allocate inside the GTT,
* must be #I915_GTT_PAGE_SIZE aligned
@@ -185,6 +188,7 @@ static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
* asked to wait for eviction and interrupted.
*/
int i915_gem_gtt_insert(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *node,
u64 size, u64 alignment, unsigned long color,
u64 start, u64 end, unsigned int flags)
@@ -202,7 +206,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
GEM_BUG_ON(start >= end);
GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
- GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+ GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
GEM_BUG_ON(drm_mm_node_allocated(node));
if (unlikely(range_overflows(start, size, end)))
@@ -270,7 +274,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
*/
offset = random_offset(start, end,
size, alignment ?: I915_GTT_MIN_ALIGNMENT);
- err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
+ err = i915_gem_gtt_reserve(vm, ww, node, size, offset, color, flags);
if (err != -ENOSPC)
return err;
@@ -278,7 +282,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
return -ENOSPC;
/* Randomly selected placement is pinned, do a search */
- err = i915_gem_evict_something(vm, size, alignment, color,
+ err = i915_gem_evict_something(vm, ww, size, alignment, color,
start, end, flags);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c9b0ee5e1d23..8c2f57eb5dda 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -16,6 +16,7 @@
struct drm_i915_gem_object;
struct i915_address_space;
+struct i915_gem_ww_ctx;
int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages);
@@ -23,11 +24,13 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages);
int i915_gem_gtt_reserve(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *node,
u64 size, u64 offset, unsigned long color,
unsigned int flags);
int i915_gem_gtt_insert(struct i915_address_space *vm,
+ struct i915_gem_ww_ctx *ww,
struct drm_mm_node *node,
u64 size, u64 alignment, unsigned long color,
u64 start, u64 end, unsigned int flags);
@@ -41,6 +44,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
#define PIN_HIGH BIT_ULL(5)
#define PIN_OFFSET_BIAS BIT_ULL(6)
#define PIN_OFFSET_FIXED BIT_ULL(7)
+#define PIN_VALIDATE BIT_ULL(8) /* validate placement only, no need to call unpin() */
#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index dbe49fd87283..c12a0adefda5 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -33,7 +33,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = pdev->revision;
break;
case I915_PARAM_NUM_FENCES_AVAIL:
- value = i915->ggtt.num_fences;
+ value = to_gt(i915)->ggtt->num_fences;
break;
case I915_PARAM_HAS_OVERLAY:
value = !!i915->overlay;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 79ef0e21d71a..1d042551619e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -52,7 +52,6 @@
#include "i915_gpu_error.h"
#include "i915_memcpy.h"
#include "i915_scatterlist.h"
-#include "i915_vma_snapshot.h"
#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
@@ -1017,8 +1016,10 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
static struct i915_vma_coredump *
i915_vma_coredump_create(const struct intel_gt *gt,
- const struct i915_vma_snapshot *vsnap,
- struct i915_vma_compress *compress)
+ const struct i915_vma_resource *vma_res,
+ struct i915_vma_compress *compress,
+ const char *name)
+
{
struct i915_ggtt *ggtt = gt->ggtt;
const u64 slot = ggtt->error_capture.start;
@@ -1028,7 +1029,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
might_sleep();
- if (!vsnap || !vsnap->pages || !compress)
+ if (!vma_res || !vma_res->bi.pages || !compress)
return NULL;
dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
@@ -1041,12 +1042,12 @@ i915_vma_coredump_create(const struct intel_gt *gt,
}
INIT_LIST_HEAD(&dst->page_list);
- strcpy(dst->name, vsnap->name);
+ strcpy(dst->name, name);
dst->next = NULL;
- dst->gtt_offset = vsnap->gtt_offset;
- dst->gtt_size = vsnap->gtt_size;
- dst->gtt_page_sizes = vsnap->page_sizes;
+ dst->gtt_offset = vma_res->start;
+ dst->gtt_size = vma_res->node_size;
+ dst->gtt_page_sizes = vma_res->page_sizes_gtt;
dst->unused = 0;
ret = -EINVAL;
@@ -1054,7 +1055,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
void __iomem *s;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vsnap->pages) {
+ for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
mutex_lock(&ggtt->error_mutex);
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
I915_CACHE_NONE, 0);
@@ -1072,11 +1073,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
if (ret)
break;
}
- } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
- struct intel_memory_region *mem = vsnap->mr;
+ } else if (vma_res->bi.lmem) {
+ struct intel_memory_region *mem = vma_res->mr;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vsnap->pages) {
+ for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
void __iomem *s;
s = io_mapping_map_wc(&mem->iomap,
@@ -1092,7 +1093,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
} else {
struct page *page;
- for_each_sgt_page(page, iter, vsnap->pages) {
+ for_each_sgt_page(page, iter, vma_res->bi.pages) {
void *s;
drm_clflush_pages(&page, 1);
@@ -1328,33 +1329,32 @@ static bool record_context(struct i915_gem_context_coredump *e,
struct intel_engine_capture_vma {
struct intel_engine_capture_vma *next;
- struct i915_vma_snapshot *vsnap;
+ struct i915_vma_resource *vma_res;
char name[16];
bool lockdep_cookie;
};
static struct intel_engine_capture_vma *
capture_vma_snapshot(struct intel_engine_capture_vma *next,
- struct i915_vma_snapshot *vsnap,
- gfp_t gfp)
+ struct i915_vma_resource *vma_res,
+ gfp_t gfp, const char *name)
{
struct intel_engine_capture_vma *c;
- if (!i915_vma_snapshot_present(vsnap))
+ if (!vma_res)
return next;
c = kmalloc(sizeof(*c), gfp);
if (!c)
return next;
- if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
+ if (!i915_vma_resource_hold(vma_res, &c->lockdep_cookie)) {
kfree(c);
return next;
}
- strcpy(c->name, vsnap->name);
- c->vsnap = vsnap;
- i915_vma_snapshot_get(vsnap);
+ strcpy(c->name, name);
+ c->vma_res = i915_vma_resource_get(vma_res);
c->next = next;
return c;
@@ -1366,8 +1366,6 @@ capture_vma(struct intel_engine_capture_vma *next,
const char *name,
gfp_t gfp)
{
- struct i915_vma_snapshot *vsnap;
-
if (!vma)
return next;
@@ -1376,19 +1374,10 @@ capture_vma(struct intel_engine_capture_vma *next,
* to a struct i915_vma_snapshot at command submission time.
* Not here.
*/
- GEM_WARN_ON(!i915_vma_is_pinned(vma));
- if (!i915_vma_is_pinned(vma))
- return next;
-
- vsnap = i915_vma_snapshot_alloc(gfp);
- if (!vsnap)
+ if (GEM_WARN_ON(!i915_vma_is_pinned(vma)))
return next;
- i915_vma_snapshot_init(vsnap, vma, name);
- next = capture_vma_snapshot(next, vsnap, gfp);
-
- /* FIXME: Replace on async unbind. */
- i915_vma_snapshot_put(vsnap);
+ next = capture_vma_snapshot(next, vma->resource, gfp, name);
return next;
}
@@ -1401,7 +1390,8 @@ capture_user(struct intel_engine_capture_vma *capture,
struct i915_capture_list *c;
for (c = rq->capture_list; c; c = c->next)
- capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
+ capture = capture_vma_snapshot(capture, c->vma_res, gfp,
+ "user");
return capture;
}
@@ -1419,16 +1409,19 @@ static struct i915_vma_coredump *
create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
const char *name, struct i915_vma_compress *compress)
{
- struct i915_vma_coredump *ret;
- struct i915_vma_snapshot tmp;
+ struct i915_vma_coredump *ret = NULL;
+ struct i915_vma_resource *vma_res;
+ bool lockdep_cookie;
if (!vma)
return NULL;
- GEM_WARN_ON(!i915_vma_is_pinned(vma));
- i915_vma_snapshot_init_onstack(&tmp, vma, name);
- ret = i915_vma_coredump_create(gt, &tmp, compress);
- i915_vma_snapshot_put_onstack(&tmp);
+ vma_res = vma->resource;
+
+ if (i915_vma_resource_hold(vma_res, &lockdep_cookie)) {
+ ret = i915_vma_coredump_create(gt, vma_res, compress, name);
+ i915_vma_resource_unhold(vma_res, lockdep_cookie);
+ }
return ret;
}
@@ -1475,7 +1468,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
* as the simplest method to avoid being overwritten
* by userspace.
*/
- vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
+ vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
vma = capture_user(vma, rq, gfp);
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
@@ -1496,14 +1489,14 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
while (capture) {
struct intel_engine_capture_vma *this = capture;
- struct i915_vma_snapshot *vsnap = this->vsnap;
+ struct i915_vma_resource *vma_res = this->vma_res;
add_vma(ee,
- i915_vma_coredump_create(engine->gt,
- vsnap, compress));
+ i915_vma_coredump_create(engine->gt, vma_res,
+ compress, this->name));
- i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
- i915_vma_snapshot_put(vsnap);
+ i915_vma_resource_unhold(vma_res, this->lockdep_cookie);
+ i915_vma_resource_put(vma_res);
capture = this->next;
kfree(this);
diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c
index f276005b8070..65acd7bf75d0 100644
--- a/drivers/gpu/drm/i915/i915_module.c
+++ b/drivers/gpu/drm/i915/i915_module.c
@@ -17,6 +17,7 @@
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_vma.h"
+#include "i915_vma_resource.h"
static int i915_check_nomodeset(void)
{
@@ -62,6 +63,8 @@ static const struct {
.exit = i915_scheduler_module_exit },
{ .init = i915_vma_module_init,
.exit = i915_vma_module_exit },
+ { .init = i915_vma_resource_module_init,
+ .exit = i915_vma_resource_module_exit },
{ .init = i915_mock_selftests },
{ .init = i915_pmu_init,
.exit = i915_pmu_exit },
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 76e590fcb903..8246cbe9b01d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1047,6 +1047,7 @@ static const struct intel_device_info dg2_info = {
.graphics.rel = 55,
.media.rel = 55,
PLATFORM(INTEL_DG2),
+ .has_guc_deprivilege = 1,
.has_64k_pages = 1,
.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3ecb89e96fb6..0a9c3fcc09b1 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1636,8 +1636,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
const u64 delay_ticks = 0xffffffffffffffff -
- intel_gt_ns_to_clock_interval(stream->perf->i915->ggtt.vm.gt,
- atomic64_read(&stream->perf->noa_programming_delay));
+ intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
+ atomic64_read(&stream->perf->noa_programming_delay));
const u32 base = stream->engine->mmio_base;
#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
u32 *batch, *ts0, *cs, *jump;
@@ -2120,7 +2120,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
- i915_reg_t flex_regs[] = {
+ static const i915_reg_t flex_regs[] = {
EU_PERF_CNTL0,
EU_PERF_CNTL1,
EU_PERF_CNTL2,
@@ -3548,7 +3548,7 @@ err:
static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
{
- return intel_gt_clock_interval_to_ns(perf->i915->ggtt.vm.gt,
+ return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
2ULL << exponent);
}
@@ -4374,6 +4374,10 @@ void i915_perf_init(struct drm_i915_private *i915)
/* XXX const struct i915_perf_ops! */
+ /* i915_perf is not enabled for DG2 yet */
+ if (IS_DG2(i915))
+ return;
+
perf->oa_formats = oa_formats;
if (IS_HASWELL(i915)) {
perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index de403ee18bc7..e2e9f543fb83 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -9732,4 +9732,8 @@ enum skl_power_gate {
#define CLKGATE_DIS_MISC _MMIO(0x46534)
#define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21)
+#define GEN12_CULLBIT1 _MMIO(0x6100)
+#define GEN12_CULLBIT2 _MMIO(0x7030)
+#define GEN12_STATE_ACK_DEBUG _MMIO(0x20BC)
+
#endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 505276373cc2..582770360ad1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -118,8 +118,10 @@ static void i915_fence_release(struct dma_fence *fence)
rq->guc_prio != GUC_PRIO_FINI);
i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
- if (i915_vma_snapshot_present(&rq->batch_snapshot))
- i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
+ if (rq->batch_res) {
+ i915_vma_resource_put(rq->batch_res);
+ rq->batch_res = NULL;
+ }
/*
* The request is put onto a RCU freelist (i.e. the address
@@ -310,7 +312,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture)
while (capture) {
struct i915_capture_list *next = capture->next;
- i915_vma_snapshot_put(capture->vma_snapshot);
+ i915_vma_resource_put(capture->vma_res);
kfree(capture);
capture = next;
}
@@ -856,7 +858,7 @@ static void __i915_request_ctor(void *arg)
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
clear_capture_list(rq);
- rq->batch_snapshot.present = false;
+ rq->batch_res = NULL;
init_llist_head(&rq->execute_cb);
}
@@ -962,7 +964,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
__rq_init_watchdog(rq);
assert_capture_list_is_null(rq);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
- GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
+ GEM_BUG_ON(rq->batch_res);
/*
* Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 170ee78c2858..28b1f9db5487 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -40,7 +40,7 @@
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_sw_fence.h"
-#include "i915_vma_snapshot.h"
+#include "i915_vma_resource.h"
#include <uapi/drm/i915_drm.h>
@@ -52,7 +52,7 @@ struct i915_request;
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
struct i915_capture_list {
- struct i915_vma_snapshot *vma_snapshot;
+ struct i915_vma_resource *vma_res;
struct i915_capture_list *next;
};
@@ -300,7 +300,7 @@ struct i915_request {
/** Batch buffer pointer for selftest internal use. */
I915_SELFTEST_DECLARE(struct i915_vma *batch);
- struct i915_vma_snapshot batch_snapshot;
+ struct i915_vma_resource *batch_res;
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
/**
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 31a105bc1792..c97323973f9b 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -197,7 +197,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt,
drm_info(&dev_priv->drm,
"balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
start, end, size / 1024);
- ret = i915_gem_gtt_reserve(&ggtt->vm, node,
+ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, node,
size, start, I915_COLOR_UNEVICTABLE,
0);
if (!ret)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 68cf1d392250..845cd88f8313 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -38,6 +38,18 @@
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_vma.h"
+#include "i915_vma_resource.h"
+
+static inline void assert_vma_held_evict(const struct i915_vma *vma)
+{
+ /*
+ * We may be forced to unbind when the vm is dead, to clean it up.
+ * This is the only exception to the requirement of the object lock
+ * being held.
+ */
+ if (atomic_read(&vma->vm->open))
+ assert_object_held_shared(vma->obj);
+}
static struct kmem_cache *slab_vmas;
@@ -285,7 +297,7 @@ struct i915_vma_work {
struct dma_fence_work base;
struct i915_address_space *vm;
struct i915_vm_pt_stash stash;
- struct i915_vma *vma;
+ struct i915_vma_resource *vma_res;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
enum i915_cache_level cache_level;
@@ -295,23 +307,24 @@ struct i915_vma_work {
static void __vma_bind(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
- struct i915_vma *vma = vw->vma;
+ struct i915_vma_resource *vma_res = vw->vma_res;
+
+ vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
+ vma_res, vw->cache_level, vw->flags);
- vma->ops->bind_vma(vw->vm, &vw->stash,
- vma, vw->cache_level, vw->flags);
}
static void __vma_release(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
- if (vw->pinned) {
- __i915_gem_object_unpin_pages(vw->pinned);
+ if (vw->pinned)
i915_gem_object_put(vw->pinned);
- }
i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm);
+ if (vw->vma_res)
+ i915_vma_resource_put(vw->vma_res);
}
static const struct dma_fence_work_ops bind_ops = {
@@ -375,12 +388,27 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
#define i915_vma_verify_bind_complete(_vma) 0
#endif
+I915_SELFTEST_EXPORT void
+i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+ struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
+ obj->mm.rsgt, i915_gem_object_is_readonly(obj),
+ i915_gem_object_is_lmem(obj), obj->mm.region,
+ vma->ops, vma->private, vma->node.start,
+ vma->node.size, vma->size);
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
* @cache_level: mapping cache level
* @flags: flags like global or local mapping
* @work: preallocated worker for allocating and binding the PTE
+ * @vma_res: pointer to a preallocated vma resource. The resource is either
+ * consumed or freed.
*
* DMA addresses are taken from the scatter-gather table of this object (or of
* this VMA in case of non-default GGTT views) and PTE entries set up.
@@ -389,10 +417,12 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
int i915_vma_bind(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags,
- struct i915_vma_work *work)
+ struct i915_vma_work *work,
+ struct i915_vma_resource *vma_res)
{
u32 bind_flags;
u32 vma_flags;
+ int ret;
lockdep_assert_held(&vma->vm->mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -400,11 +430,15 @@ int i915_vma_bind(struct i915_vma *vma,
if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
vma->node.size,
- vma->vm->total)))
+ vma->vm->total))) {
+ i915_vma_resource_free(vma_res);
return -ENODEV;
+ }
- if (GEM_DEBUG_WARN_ON(!flags))
+ if (GEM_DEBUG_WARN_ON(!flags)) {
+ i915_vma_resource_free(vma_res);
return -EINVAL;
+ }
bind_flags = flags;
bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
@@ -413,16 +447,44 @@ int i915_vma_bind(struct i915_vma *vma,
vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
bind_flags &= ~vma_flags;
- if (bind_flags == 0)
+ if (bind_flags == 0) {
+ i915_vma_resource_free(vma_res);
return 0;
+ }
GEM_BUG_ON(!atomic_read(&vma->pages_count));
+ /* Wait for or await async unbinds touching our range */
+ if (work && bind_flags & vma->vm->bind_async_flags)
+ ret = i915_vma_resource_bind_dep_await(vma->vm,
+ &work->base.chain,
+ vma->node.start,
+ vma->node.size,
+ true,
+ GFP_NOWAIT |
+ __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN);
+ else
+ ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
+ vma->node.size, true);
+ if (ret) {
+ i915_vma_resource_free(vma_res);
+ return ret;
+ }
+
+ if (vma->resource || !vma_res) {
+ /* Rebinding with an additional I915_VMA_*_BIND */
+ GEM_WARN_ON(!vma_flags);
+ i915_vma_resource_free(vma_res);
+ } else {
+ i915_vma_resource_init_from_vma(vma_res, vma);
+ vma->resource = vma_res;
+ }
trace_i915_vma_bind(vma, bind_flags);
if (work && bind_flags & vma->vm->bind_async_flags) {
struct dma_fence *prev;
- work->vma = vma;
+ work->vma_res = i915_vma_resource_get(vma->resource);
work->cache_level = cache_level;
work->flags = bind_flags;
@@ -445,17 +507,25 @@ int i915_vma_bind(struct i915_vma *vma,
work->base.dma.error = 0; /* enable the queue_work() */
- __i915_gem_object_pin_pages(vma->obj);
- work->pinned = i915_gem_object_get(vma->obj);
+ /*
+ * If we don't have the refcounted pages list, keep a reference
+ * on the object to avoid waiting for the async bind to
+ * complete in the object destruction path.
+ */
+ if (!work->vma_res->bi.pages_rsgt)
+ work->pinned = i915_gem_object_get(vma->obj);
} else {
if (vma->obj) {
- int ret;
-
ret = i915_gem_object_wait_moving_fence(vma->obj, true);
- if (ret)
+ if (ret) {
+ i915_vma_resource_free(vma->resource);
+ vma->resource = NULL;
+
return ret;
+ }
}
- vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
+ vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
+ bind_flags);
}
if (vma->obj)
@@ -655,7 +725,8 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
* 0 on success, negative error code otherwise.
*/
static int
-i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u64 size, u64 alignment, u64 flags)
{
unsigned long color;
u64 start, end;
@@ -707,7 +778,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
range_overflows(offset, size, end))
return -EINVAL;
- ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
+ ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node,
size, offset, color,
flags);
if (ret)
@@ -746,7 +817,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
}
- ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+ ret = i915_gem_gtt_insert(vma->vm, ww, &vma->node,
size, alignment, color,
start, end, flags);
if (ret)
@@ -780,9 +851,17 @@ i915_vma_detach(struct i915_vma *vma)
static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
{
unsigned int bound;
- bool pinned = true;
bound = atomic_read(&vma->flags);
+
+ if (flags & PIN_VALIDATE) {
+ flags &= I915_VMA_BIND_MASK;
+
+ return (flags & bound) == flags;
+ }
+
+ /* with the lock mandatory for unbind, we don't race here */
+ flags &= I915_VMA_BIND_MASK;
do {
if (unlikely(flags & ~bound))
return false;
@@ -790,34 +869,10 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
return false;
- if (!(bound & I915_VMA_PIN_MASK))
- goto unpinned;
-
GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
return true;
-
-unpinned:
- /*
- * If pin_count==0, but we are bound, check under the lock to avoid
- * racing with a concurrent i915_vma_unbind().
- */
- mutex_lock(&vma->vm->mutex);
- do {
- if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
- pinned = false;
- break;
- }
-
- if (unlikely(flags & ~bound)) {
- pinned = false;
- break;
- }
- } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
- mutex_unlock(&vma->vm->mutex);
-
- return pinned;
}
static struct scatterlist *
@@ -913,30 +968,39 @@ err_st_alloc:
}
static struct scatterlist *
-remap_pages(struct drm_i915_gem_object *obj,
- unsigned int offset, unsigned int alignment_pad,
- unsigned int width, unsigned int height,
- unsigned int src_stride, unsigned int dst_stride,
- struct sg_table *st, struct scatterlist *sg)
+add_padding_pages(unsigned int count,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ st->nents++;
+
+ /*
+ * The DE ignores the PTEs for the padding tiles, the sg entry
+ * here is just a convenience to indicate how many padding PTEs
+ * to insert at this spot.
+ */
+ sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0);
+ sg_dma_address(sg) = 0;
+ sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE;
+ sg = sg_next(sg);
+
+ return sg;
+}
+
+static struct scatterlist *
+remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj,
+ unsigned int offset, unsigned int alignment_pad,
+ unsigned int width, unsigned int height,
+ unsigned int src_stride, unsigned int dst_stride,
+ struct sg_table *st, struct scatterlist *sg,
+ unsigned int *gtt_offset)
{
unsigned int row;
if (!width || !height)
return sg;
- if (alignment_pad) {
- st->nents++;
-
- /*
- * The DE ignores the PTEs for the padding tiles, the sg entry
- * here is just a convenience to indicate how many padding PTEs
- * to insert at this spot.
- */
- sg_set_page(sg, NULL, alignment_pad * 4096, 0);
- sg_dma_address(sg) = 0;
- sg_dma_len(sg) = alignment_pad * 4096;
- sg = sg_next(sg);
- }
+ if (alignment_pad)
+ sg = add_padding_pages(alignment_pad, st, sg);
for (row = 0; row < height; row++) {
unsigned int left = width * I915_GTT_PAGE_SIZE;
@@ -973,18 +1037,98 @@ remap_pages(struct drm_i915_gem_object *obj,
if (!left)
continue;
+ sg = add_padding_pages(left >> PAGE_SHIFT, st, sg);
+ }
+
+ *gtt_offset += alignment_pad + dst_stride * height;
+
+ return sg;
+}
+
+static struct scatterlist *
+remap_contiguous_pages(struct drm_i915_gem_object *obj,
+ unsigned int obj_offset,
+ unsigned int count,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ struct scatterlist *iter;
+ unsigned int offset;
+
+ iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset);
+ GEM_BUG_ON(!iter);
+
+ do {
+ unsigned int len;
+
+ len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
+ count << PAGE_SHIFT);
+ sg_set_page(sg, NULL, len, 0);
+ sg_dma_address(sg) =
+ sg_dma_address(iter) + (offset << PAGE_SHIFT);
+ sg_dma_len(sg) = len;
+
st->nents++;
+ count -= len >> PAGE_SHIFT;
+ if (count == 0)
+ return sg;
- /*
- * The DE ignores the PTEs for the padding tiles, the sg entry
- * here is just a conenience to indicate how many padding PTEs
- * to insert at this spot.
- */
- sg_set_page(sg, NULL, left, 0);
- sg_dma_address(sg) = 0;
- sg_dma_len(sg) = left;
- sg = sg_next(sg);
- }
+ sg = __sg_next(sg);
+ iter = __sg_next(iter);
+ offset = 0;
+ } while (1);
+}
+
+static struct scatterlist *
+remap_linear_color_plane_pages(struct drm_i915_gem_object *obj,
+ unsigned int obj_offset, unsigned int alignment_pad,
+ unsigned int size,
+ struct sg_table *st, struct scatterlist *sg,
+ unsigned int *gtt_offset)
+{
+ if (!size)
+ return sg;
+
+ if (alignment_pad)
+ sg = add_padding_pages(alignment_pad, st, sg);
+
+ sg = remap_contiguous_pages(obj, obj_offset, size, st, sg);
+ sg = sg_next(sg);
+
+ *gtt_offset += alignment_pad + size;
+
+ return sg;
+}
+
+static struct scatterlist *
+remap_color_plane_pages(const struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj,
+ int color_plane,
+ struct sg_table *st, struct scatterlist *sg,
+ unsigned int *gtt_offset)
+{
+ unsigned int alignment_pad = 0;
+
+ if (rem_info->plane_alignment)
+ alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset;
+
+ if (rem_info->plane[color_plane].linear)
+ sg = remap_linear_color_plane_pages(obj,
+ rem_info->plane[color_plane].offset,
+ alignment_pad,
+ rem_info->plane[color_plane].size,
+ st, sg,
+ gtt_offset);
+
+ else
+ sg = remap_tiled_color_plane_pages(obj,
+ rem_info->plane[color_plane].offset,
+ alignment_pad,
+ rem_info->plane[color_plane].width,
+ rem_info->plane[color_plane].height,
+ rem_info->plane[color_plane].src_stride,
+ rem_info->plane[color_plane].dst_stride,
+ st, sg,
+ gtt_offset);
return sg;
}
@@ -1013,21 +1157,8 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
st->nents = 0;
sg = st->sgl;
- for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
- unsigned int alignment_pad = 0;
-
- if (rem_info->plane_alignment)
- alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset;
-
- sg = remap_pages(obj,
- rem_info->plane[i].offset, alignment_pad,
- rem_info->plane[i].width, rem_info->plane[i].height,
- rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
- st, sg);
-
- gtt_offset += alignment_pad +
- rem_info->plane[i].dst_stride * rem_info->plane[i].height;
- }
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
+ sg = remap_color_plane_pages(rem_info, obj, i, st, sg, &gtt_offset);
i915_sg_trim(st);
@@ -1049,9 +1180,8 @@ intel_partial_pages(const struct i915_ggtt_view *view,
struct drm_i915_gem_object *obj)
{
struct sg_table *st;
- struct scatterlist *sg, *iter;
+ struct scatterlist *sg;
unsigned int count = view->partial.size;
- unsigned int offset;
int ret = -ENOMEM;
st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -1062,34 +1192,14 @@ intel_partial_pages(const struct i915_ggtt_view *view,
if (ret)
goto err_sg_alloc;
- iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
- GEM_BUG_ON(!iter);
-
- sg = st->sgl;
st->nents = 0;
- do {
- unsigned int len;
-
- len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
- count << PAGE_SHIFT);
- sg_set_page(sg, NULL, len, 0);
- sg_dma_address(sg) =
- sg_dma_address(iter) + (offset << PAGE_SHIFT);
- sg_dma_len(sg) = len;
- st->nents++;
- count -= len >> PAGE_SHIFT;
- if (count == 0) {
- sg_mark_end(sg);
- i915_sg_trim(st); /* Drop any unused tail entries. */
+ sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl);
- return st;
- }
+ sg_mark_end(sg);
+ i915_sg_trim(st); /* Drop any unused tail entries. */
- sg = __sg_next(sg);
- iter = __sg_next(iter);
- offset = 0;
- } while (1);
+ return st;
err_sg_alloc:
kfree(st);
@@ -1101,7 +1211,6 @@ static int
__i915_vma_get_pages(struct i915_vma *vma)
{
struct sg_table *pages;
- int ret;
/*
* The vma->pages are only valid within the lifespan of the borrowed
@@ -1134,18 +1243,16 @@ __i915_vma_get_pages(struct i915_vma *vma)
break;
}
- ret = 0;
if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
- pages = NULL;
drm_err(&vma->vm->i915->drm,
- "Failed to get pages for VMA view type %u (%d)!\n",
- vma->ggtt_view.type, ret);
+ "Failed to get pages for VMA view type %u (%ld)!\n",
+ vma->ggtt_view.type, PTR_ERR(pages));
+ return PTR_ERR(pages);
}
vma->pages = pages;
- return ret;
+ return 0;
}
I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma)
@@ -1177,25 +1284,14 @@ err_unpin:
static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
{
/* We allocate under vma_get_pages, so beware the shrinker */
- struct sg_table *pages = READ_ONCE(vma->pages);
-
GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
if (atomic_sub_return(count, &vma->pages_count) == 0) {
- /*
- * The atomic_sub_return is a read barrier for the READ_ONCE of
- * vma->pages above.
- *
- * READ_ONCE is safe because this is either called from the same
- * function (i915_vma_pin_ww), or guarded by vma->vm->mutex.
- *
- * TODO: We're leaving vma->pages dangling, until vma->obj->resv
- * lock is required.
- */
- if (pages != vma->obj->mm.pages) {
- sg_free_table(pages);
- kfree(pages);
+ if (vma->pages != vma->obj->mm.pages) {
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
}
+ vma->pages = NULL;
i915_gem_object_unpin_pages(vma->obj);
}
@@ -1228,6 +1324,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
{
struct i915_vma_work *work = NULL;
struct dma_fence *moving = NULL;
+ struct i915_vma_resource *vma_res = NULL;
intel_wakeref_t wakeref = 0;
unsigned int bound;
int err;
@@ -1241,7 +1338,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
/* First try and grab the pin without rebinding the vma */
- if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
+ if (try_qad_pin(vma, flags))
return 0;
err = i915_vma_get_pages(vma);
@@ -1282,6 +1379,12 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
}
}
+ vma_res = i915_vma_resource_alloc();
+ if (IS_ERR(vma_res)) {
+ err = PTR_ERR(vma_res);
+ goto err_fence;
+ }
+
/*
* Differentiate between user/kernel vma inside the aliasing-ppgtt.
*
@@ -1302,7 +1405,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
err = mutex_lock_interruptible_nested(&vma->vm->mutex,
!(flags & PIN_GLOBAL));
if (err)
- goto err_fence;
+ goto err_vma_res;
/* No more allocations allowed now we hold vm->mutex */
@@ -1323,7 +1426,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
}
if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
- __i915_vma_pin(vma);
+ if (!(flags & PIN_VALIDATE))
+ __i915_vma_pin(vma);
goto err_unlock;
}
@@ -1332,7 +1436,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
goto err_unlock;
if (!(bound & I915_VMA_BIND_MASK)) {
- err = i915_vma_insert(vma, size, alignment, flags);
+ err = i915_vma_insert(vma, ww, size, alignment, flags);
if (err)
goto err_active;
@@ -1343,7 +1447,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!vma->pages);
err = i915_vma_bind(vma,
vma->obj->cache_level,
- flags, work);
+ flags, work, vma_res);
+ vma_res = NULL;
if (err)
goto err_remove;
@@ -1352,8 +1457,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
- __i915_vma_pin(vma);
- GEM_BUG_ON(!i915_vma_is_pinned(vma));
+ if (!(flags & PIN_VALIDATE)) {
+ __i915_vma_pin(vma);
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+ }
GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
@@ -1366,6 +1473,8 @@ err_active:
i915_active_release(&vma->active);
err_unlock:
mutex_unlock(&vma->vm->mutex);
+err_vma_res:
+ i915_vma_resource_free(vma_res);
err_fence:
if (work)
dma_fence_work_commit_imm(&work->base);
@@ -1412,7 +1521,12 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);
if (mutex_lock_interruptible(&vm->mutex) == 0) {
- i915_gem_evict_vm(vm);
+ /*
+ * We pass NULL ww here, as we don't want to unbind
+ * locked objects when called from execbuf when pinning
+ * is removed. This would probably regress badly.
+ */
+ i915_gem_evict_vm(vm, NULL);
mutex_unlock(&vm->mutex);
}
} while (1);
@@ -1516,6 +1630,7 @@ void i915_vma_release(struct kref *ref)
i915_vm_put(vma->vm);
i915_active_fini(&vma->active);
+ GEM_WARN_ON(vma->resource);
i915_vma_free(vma);
}
@@ -1548,8 +1663,16 @@ void i915_vma_parked(struct intel_gt *gt)
struct drm_i915_gem_object *obj = vma->obj;
struct i915_address_space *vm = vma->vm;
- INIT_LIST_HEAD(&vma->closed_link);
- __i915_vma_put(vma);
+ if (i915_gem_object_trylock(obj, NULL)) {
+ INIT_LIST_HEAD(&vma->closed_link);
+ __i915_vma_put(vma);
+ i915_gem_object_unlock(obj);
+ } else {
+ /* back you go.. */
+ spin_lock_irq(&gt->closed_lock);
+ list_add(&vma->closed_link, &gt->closed_vma);
+ spin_unlock_irq(&gt->closed_lock);
+ }
i915_gem_object_put(obj);
i915_vm_close(vm);
@@ -1600,8 +1723,6 @@ static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *
{
int err;
- GEM_BUG_ON(!i915_vma_is_pinned(vma));
-
/* Wait for the vma to be bound before we start! */
err = __i915_request_await_bind(rq, vma);
if (err)
@@ -1620,6 +1741,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
assert_object_held(obj);
+ GEM_BUG_ON(!vma->pages);
+
err = __i915_vma_move_to_active(vma, rq);
if (unlikely(err))
return err;
@@ -1662,9 +1785,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
return 0;
}
-void __i915_vma_evict(struct i915_vma *vma)
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
{
+ struct i915_vma_resource *vma_res = vma->resource;
+ struct dma_fence *unbind_fence;
+
GEM_BUG_ON(i915_vma_is_pinned(vma));
+ assert_vma_held_evict(vma);
if (i915_vma_is_map_and_fenceable(vma)) {
/* Force a pagefault for domain tracking on next user access */
@@ -1694,15 +1821,36 @@ void __i915_vma_evict(struct i915_vma *vma)
GEM_BUG_ON(vma->fence);
GEM_BUG_ON(i915_vma_has_userfault(vma));
- if (likely(atomic_read(&vma->vm->open))) {
- trace_i915_vma_unbind(vma);
- vma->ops->unbind_vma(vma->vm, vma);
- }
+ /* Object backend must be async capable. */
+ GEM_WARN_ON(async && !vma->resource->bi.pages_rsgt);
+
+ /* If vm is not open, unbind is a nop. */
+ vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
+ atomic_read(&vma->vm->open);
+ trace_i915_vma_unbind(vma);
+
+ unbind_fence = i915_vma_resource_unbind(vma_res);
+ vma->resource = NULL;
+
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
&vma->flags);
i915_vma_detach(vma);
+
+ if (!async && unbind_fence) {
+ dma_fence_wait(unbind_fence, false);
+ dma_fence_put(unbind_fence);
+ unbind_fence = NULL;
+ }
+
+ /*
+ * Binding itself may not have completed until the unbind fence signals,
+ * so don't drop the pages until that happens, unless the resource is
+ * async_capable.
+ */
+
vma_unbind_pages(vma);
+ return unbind_fence;
}
int __i915_vma_unbind(struct i915_vma *vma)
@@ -1710,6 +1858,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
int ret;
lockdep_assert_held(&vma->vm->mutex);
+ assert_vma_held_evict(vma);
if (!drm_mm_node_allocated(&vma->node))
return 0;
@@ -1729,18 +1878,55 @@ int __i915_vma_unbind(struct i915_vma *vma)
return ret;
GEM_BUG_ON(i915_vma_is_active(vma));
- __i915_vma_evict(vma);
+ __i915_vma_evict(vma, false);
drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
return 0;
}
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
+{
+ struct dma_fence *fence;
+
+ lockdep_assert_held(&vma->vm->mutex);
+
+ if (!drm_mm_node_allocated(&vma->node))
+ return NULL;
+
+ if (i915_vma_is_pinned(vma) ||
+ &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
+ return ERR_PTR(-EAGAIN);
+
+ /*
+ * We probably need to replace this with awaiting the fences of the
+ * object's dma_resv when the vma active goes away. When doing that
+ * we need to be careful to not add the vma_resource unbind fence
+ * immediately to the object's dma_resv, because then unbinding
+ * the next vma from the object, in case there are many, will
+ * actually await the unbinding of the previous vmas, which is
+ * undesirable.
+ */
+ if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
+ I915_ACTIVE_AWAIT_EXCL |
+ I915_ACTIVE_AWAIT_ACTIVE) < 0) {
+ return ERR_PTR(-EBUSY);
+ }
+
+ fence = __i915_vma_evict(vma, true);
+
+ drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
+
+ return fence;
+}
+
int i915_vma_unbind(struct i915_vma *vma)
{
struct i915_address_space *vm = vma->vm;
intel_wakeref_t wakeref = 0;
int err;
+ assert_object_held_shared(vma->obj);
+
/* Optimistic wait before taking the mutex */
err = i915_vma_sync(vma);
if (err)
@@ -1771,6 +1957,79 @@ out_rpm:
return err;
}
+int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ struct i915_address_space *vm = vma->vm;
+ intel_wakeref_t wakeref = 0;
+ struct dma_fence *fence;
+ int err;
+
+ /*
+ * We need the dma-resv lock since we add the
+ * unbind fence to the dma-resv object.
+ */
+ assert_object_held(obj);
+
+ if (!drm_mm_node_allocated(&vma->node))
+ return 0;
+
+ if (i915_vma_is_pinned(vma)) {
+ vma_print_allocator(vma, "is pinned");
+ return -EAGAIN;
+ }
+
+ if (!obj->mm.rsgt)
+ return -EBUSY;
+
+ err = dma_resv_reserve_shared(obj->base.resv, 1);
+ if (err)
+ return -EBUSY;
+
+ /*
+ * It would be great if we could grab this wakeref from the
+ * async unbind work if needed, but we can't because it uses
+ * kmalloc and it's in the dma-fence signalling critical path.
+ */
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+ if (trylock_vm && !mutex_trylock(&vm->mutex)) {
+ err = -EBUSY;
+ goto out_rpm;
+ } else if (!trylock_vm) {
+ err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref);
+ if (err)
+ goto out_rpm;
+ }
+
+ fence = __i915_vma_unbind_async(vma);
+ mutex_unlock(&vm->mutex);
+ if (IS_ERR_OR_NULL(fence)) {
+ err = PTR_ERR_OR_ZERO(fence);
+ goto out_rpm;
+ }
+
+ dma_resv_add_shared_fence(obj->base.resv, fence);
+ dma_fence_put(fence);
+
+out_rpm:
+ if (wakeref)
+ intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+ return err;
+}
+
+int i915_vma_unbind_unlocked(struct i915_vma *vma)
+{
+ int err;
+
+ i915_gem_object_lock(vma->obj, NULL);
+ err = i915_vma_unbind(vma);
+ i915_gem_object_unlock(vma->obj);
+
+ return err;
+}
+
struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
{
i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 32719431b3df..011af044ad4f 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -37,6 +37,7 @@
#include "i915_active.h"
#include "i915_request.h"
+#include "i915_vma_resource.h"
#include "i915_vma_types.h"
struct i915_vma *
@@ -204,16 +205,19 @@ struct i915_vma_work *i915_vma_work(void);
int i915_vma_bind(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags,
- struct i915_vma_work *work);
+ struct i915_vma_work *work,
+ struct i915_vma_resource *vma_res);
bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
void i915_vma_revoke_mmap(struct i915_vma *vma);
-void __i915_vma_evict(struct i915_vma *vma);
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
int __i915_vma_unbind(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm);
+int __must_check i915_vma_unbind_unlocked(struct i915_vma *vma);
void i915_vma_unlink_ctx(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
void i915_vma_reopen(struct i915_vma *vma);
@@ -337,12 +341,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
*/
void i915_vma_unpin_iomap(struct i915_vma *vma);
-static inline struct page *i915_vma_first_page(struct i915_vma *vma)
-{
- GEM_BUG_ON(!vma->pages);
- return sg_page(vma->pages->sgl);
-}
-
/**
* i915_vma_pin_fence - pin fencing state
* @vma: vma to pin fencing for
@@ -428,6 +426,26 @@ static inline int i915_vma_sync(struct i915_vma *vma)
return i915_active_wait(&vma->active);
}
+/**
+ * i915_vma_get_current_resource - Get the current resource of the vma
+ * @vma: The vma to get the current resource from.
+ *
+ * It's illegal to call this function if the vma is not bound.
+ *
+ * Return: A refcounted pointer to the current vma resource
+ * of the vma, assuming the vma is bound.
+ */
+static inline struct i915_vma_resource *
+i915_vma_get_current_resource(struct i915_vma *vma)
+{
+ return i915_vma_resource_get(vma->resource);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+void i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+ struct i915_vma *vma);
+#endif
+
void i915_vma_module_exit(void);
int i915_vma_module_init(void);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
new file mode 100644
index 000000000000..57ae92ba8af1
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
+
+#include "i915_sw_fence.h"
+#include "i915_vma_resource.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+
+#include "gt/intel_gtt.h"
+
+static struct kmem_cache *slab_vma_resources;
+
+/**
+ * DOC:
+ * We use a per-vm interval tree to keep track of vma_resources
+ * scheduled for unbind but not yet unbound. The tree is protected by
+ * the vm mutex, and nodes are removed just after the unbind fence signals.
+ * The removal takes the vm mutex from a kernel thread which we need to
+ * keep in mind so that we don't grab the mutex and try to wait for all
+ * pending unbinds to complete, because that will temporaryily block many
+ * of the workqueue threads, and people will get angry.
+ *
+ * We should consider using a single ordered fence per VM instead but that
+ * requires ordering the unbinds and might introduce unnecessary waiting
+ * for unrelated unbinds. Amount of code will probably be roughly the same
+ * due to the simplicity of using the interval tree interface.
+ *
+ * Another drawback of this interval tree is that the complexity of insertion
+ * and removal of fences increases as O(ln(pending_unbinds)) instead of
+ * O(1) for a single fence without interval tree.
+ */
+#define VMA_RES_START(_node) ((_node)->start)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
+ u64, __subtree_last,
+ VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
+
+/* Callbacks for the unbind dma-fence. */
+
+/**
+ * i915_vma_resource_alloc - Allocate a vma resource
+ *
+ * Return: A pointer to a cleared struct i915_vma_resource or
+ * a -ENOMEM error pointer if allocation fails.
+ */
+struct i915_vma_resource *i915_vma_resource_alloc(void)
+{
+ struct i915_vma_resource *vma_res =
+ kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
+
+ return vma_res ? vma_res : ERR_PTR(-ENOMEM);
+}
+
+/**
+ * i915_vma_resource_free - Free a vma resource
+ * @vma_res: The vma resource to free.
+ */
+void i915_vma_resource_free(struct i915_vma_resource *vma_res)
+{
+ if (vma_res)
+ kmem_cache_free(slab_vma_resources, vma_res);
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+ return "vma unbind fence";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static void unbind_fence_free_rcu(struct rcu_head *head)
+{
+ struct i915_vma_resource *vma_res =
+ container_of(head, typeof(*vma_res), unbind_fence.rcu);
+
+ i915_vma_resource_free(vma_res);
+}
+
+static void unbind_fence_release(struct dma_fence *fence)
+{
+ struct i915_vma_resource *vma_res =
+ container_of(fence, typeof(*vma_res), unbind_fence);
+
+ i915_sw_fence_fini(&vma_res->chain);
+
+ call_rcu(&fence->rcu, unbind_fence_free_rcu);
+}
+
+static struct dma_fence_ops unbind_fence_ops = {
+ .get_driver_name = get_driver_name,
+ .get_timeline_name = get_timeline_name,
+ .release = unbind_fence_release,
+};
+
+static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
+{
+ struct i915_address_space *vm;
+
+ if (!refcount_dec_and_test(&vma_res->hold_count))
+ return;
+
+ dma_fence_signal(&vma_res->unbind_fence);
+
+ vm = vma_res->vm;
+ if (vma_res->wakeref)
+ intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
+
+ vma_res->vm = NULL;
+ if (!RB_EMPTY_NODE(&vma_res->rb)) {
+ mutex_lock(&vm->mutex);
+ vma_res_itree_remove(vma_res, &vm->pending_unbind);
+ mutex_unlock(&vm->mutex);
+ }
+
+ if (vma_res->bi.pages_rsgt)
+ i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+}
+
+/**
+ * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
+ * fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
+ *
+ * The function may leave a dma_fence critical section.
+ */
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+ bool lockdep_cookie)
+{
+ dma_fence_end_signalling(lockdep_cookie);
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ unsigned long irq_flags;
+
+ /* Inefficient open-coded might_lock_irqsave() */
+ spin_lock_irqsave(&vma_res->lock, irq_flags);
+ spin_unlock_irqrestore(&vma_res->lock, irq_flags);
+ }
+
+ __i915_vma_resource_unhold(vma_res);
+}
+
+/**
+ * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
+ * be given as an argument to the pairing i915_vma_resource_unhold.
+ *
+ * If returning true, the function enters a dma_fence signalling critical
+ * section if not in one already.
+ *
+ * Return: true if holding successful, false if not.
+ */
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+ bool *lockdep_cookie)
+{
+ bool held = refcount_inc_not_zero(&vma_res->hold_count);
+
+ if (held)
+ *lockdep_cookie = dma_fence_begin_signalling();
+
+ return held;
+}
+
+static void i915_vma_resource_unbind_work(struct work_struct *work)
+{
+ struct i915_vma_resource *vma_res =
+ container_of(work, typeof(*vma_res), work);
+ struct i915_address_space *vm = vma_res->vm;
+ bool lockdep_cookie;
+
+ lockdep_cookie = dma_fence_begin_signalling();
+ if (likely(atomic_read(&vm->open)))
+ vma_res->ops->unbind_vma(vm, vma_res);
+
+ dma_fence_end_signalling(lockdep_cookie);
+ __i915_vma_resource_unhold(vma_res);
+ i915_vma_resource_put(vma_res);
+}
+
+static int
+i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
+ enum i915_sw_fence_notify state)
+{
+ struct i915_vma_resource *vma_res =
+ container_of(fence, typeof(*vma_res), chain);
+ struct dma_fence *unbind_fence =
+ &vma_res->unbind_fence;
+
+ switch (state) {
+ case FENCE_COMPLETE:
+ dma_fence_get(unbind_fence);
+ if (vma_res->immediate_unbind) {
+ i915_vma_resource_unbind_work(&vma_res->work);
+ } else {
+ INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
+ queue_work(system_unbound_wq, &vma_res->work);
+ }
+ break;
+ case FENCE_FREE:
+ i915_vma_resource_put(vma_res);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * i915_vma_resource_unbind - Unbind a vma resource
+ * @vma_res: The vma resource to unbind.
+ *
+ * At this point this function does little more than publish a fence that
+ * signals immediately unless signaling is held back.
+ *
+ * Return: A refcounted pointer to a dma-fence that signals when unbinding is
+ * complete.
+ */
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+{
+ struct i915_address_space *vm = vma_res->vm;
+
+ /* Reference for the sw fence */
+ i915_vma_resource_get(vma_res);
+
+ /* Caller must already have a wakeref in this case. */
+ if (vma_res->needs_wakeref)
+ vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
+
+ if (atomic_read(&vma_res->chain.pending) <= 1) {
+ RB_CLEAR_NODE(&vma_res->rb);
+ vma_res->immediate_unbind = 1;
+ } else {
+ vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
+ }
+
+ i915_sw_fence_commit(&vma_res->chain);
+
+ return &vma_res->unbind_fence;
+}
+
+/**
+ * __i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ *
+ * Initializes the private members of a vma resource.
+ */
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
+{
+ spin_lock_init(&vma_res->lock);
+ dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
+ &vma_res->lock, 0, 0);
+ refcount_set(&vma_res->hold_count, 1);
+ i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
+}
+
+static void
+i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
+ u64 *start,
+ u64 *end)
+{
+ if (i915_vm_has_cache_coloring(vm)) {
+ if (*start)
+ *start -= I915_GTT_PAGE_SIZE;
+ *end += I915_GTT_PAGE_SIZE;
+ }
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
+ * certain vm range.
+ * @vm: The vm to look at.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ *
+ * The function needs to be called with the vm lock held.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+ u64 offset,
+ u64 size,
+ bool intr)
+{
+ struct i915_vma_resource *node;
+ u64 last = offset + size - 1;
+
+ lockdep_assert_held(&vm->mutex);
+ might_sleep();
+
+ i915_vma_resource_color_adjust_range(vm, &offset, &last);
+ node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+ while (node) {
+ int ret = dma_fence_wait(&node->unbind_fence, intr);
+
+ if (ret)
+ return ret;
+
+ node = vma_res_itree_iter_next(node, offset, last);
+ }
+
+ return 0;
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
+ * releasing the vm lock while waiting.
+ * @vm: The vm to look at.
+ *
+ * The function may not be called with the vm lock held.
+ * Typically this is called at vm destruction to finish any pending
+ * unbind operations. The vm mutex is released while waiting to avoid
+ * stalling kernel workqueues trying to grab the mutex.
+ */
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
+{
+ struct i915_vma_resource *node;
+ struct dma_fence *fence;
+
+ do {
+ fence = NULL;
+ mutex_lock(&vm->mutex);
+ node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
+ U64_MAX);
+ if (node)
+ fence = dma_fence_get_rcu(&node->unbind_fence);
+ mutex_unlock(&vm->mutex);
+
+ if (fence) {
+ /*
+ * The wait makes sure the node eventually removes
+ * itself from the tree.
+ */
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+ } while (node);
+}
+
+/**
+ * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
+ * pending unbinds in a certain range of a vm.
+ * @vm: The vm to look at.
+ * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ * @gfp: Allocation mode for memory allocations.
+ *
+ * The function makes @sw_fence await all pending unbinds in a certain
+ * vm range before calling the complete notifier. To be able to await
+ * each individual unbind, the function needs to allocate memory using
+ * the @gpf allocation mode. If that fails, the function will instead
+ * wait for the unbind fence to signal, using @intr to judge whether to
+ * wait interruptible or not. Note that @gfp should ideally be selected so
+ * as to avoid any expensive memory allocation stalls and rather fail and
+ * synchronize itself. For now the vm mutex is required when calling this
+ * function with means that @gfp can't call into direct reclaim. In reality
+ * this means that during heavy memory pressure, we will sync in this
+ * function.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+ struct i915_sw_fence *sw_fence,
+ u64 offset,
+ u64 size,
+ bool intr,
+ gfp_t gfp)
+{
+ struct i915_vma_resource *node;
+ u64 last = offset + size - 1;
+
+ lockdep_assert_held(&vm->mutex);
+ might_alloc(gfp);
+ might_sleep();
+
+ i915_vma_resource_color_adjust_range(vm, &offset, &last);
+ node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+ while (node) {
+ int ret;
+
+ ret = i915_sw_fence_await_dma_fence(sw_fence,
+ &node->unbind_fence,
+ 0, gfp);
+ if (ret < 0) {
+ ret = dma_fence_wait(&node->unbind_fence, intr);
+ if (ret)
+ return ret;
+ }
+
+ node = vma_res_itree_iter_next(node, offset, last);
+ }
+
+ return 0;
+}
+
+void i915_vma_resource_module_exit(void)
+{
+ kmem_cache_destroy(slab_vma_resources);
+}
+
+int __init i915_vma_resource_module_init(void)
+{
+ slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
+ if (!slab_vma_resources)
+ return -ENOMEM;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
new file mode 100644
index 000000000000..25913913baa6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_VMA_RESOURCE_H__
+#define __I915_VMA_RESOURCE_H__
+
+#include <linux/dma-fence.h>
+#include <linux/refcount.h>
+
+#include "i915_gem.h"
+#include "i915_scatterlist.h"
+#include "i915_sw_fence.h"
+#include "intel_runtime_pm.h"
+
+struct intel_memory_region;
+
+struct i915_page_sizes {
+ /**
+ * The sg mask of the pages sg_table. i.e the mask of
+ * the lengths for each sg entry.
+ */
+ unsigned int phys;
+
+ /**
+ * The gtt page sizes we are allowed to use given the
+ * sg mask and the supported page sizes. This will
+ * express the smallest unit we can use for the whole
+ * object, as well as the larger sizes we may be able
+ * to use opportunistically.
+ */
+ unsigned int sg;
+};
+
+/**
+ * struct i915_vma_resource - Snapshotted unbind information.
+ * @unbind_fence: Fence to mark unbinding complete. Note that this fence
+ * is not considered published until unbind is scheduled, and as such it
+ * is illegal to access this fence before scheduled unbind other than
+ * for refcounting.
+ * @lock: The @unbind_fence lock.
+ * @hold_count: Number of holders blocking the fence from finishing.
+ * The vma itself is keeping a hold, which is released when unbind
+ * is scheduled.
+ * @work: Work struct for deferred unbind work.
+ * @chain: Pointer to struct i915_sw_fence used to await dependencies.
+ * @rb: Rb node for the vm's pending unbind interval tree.
+ * @__subtree_last: Interval tree private member.
+ * @vm: non-refcounted pointer to the vm. This is for internal use only and
+ * this member is cleared after vm_resource unbind.
+ * @mr: The memory region of the object pointed to by the vma.
+ * @ops: Pointer to the backend i915_vma_ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @vma_size: Bind size.
+ * @page_sizes_gtt: Resulting page sizes from the bind operation.
+ * @bound_flags: Flags indicating binding status.
+ * @allocated: Backend private data. TODO: Should move into @private.
+ * @immediate_unbind: Unbind can be done immediately and doesn't need to be
+ * deferred to a work item awaiting unsignaled fences. This is a hack.
+ * (dma_fence_work uses a fence flag for this, but this seems slightly
+ * cleaner).
+ *
+ * The lifetime of a struct i915_vma_resource is from a binding request to
+ * the actual possible asynchronous unbind has completed.
+ */
+struct i915_vma_resource {
+ struct dma_fence unbind_fence;
+ /* See above for description of the lock. */
+ spinlock_t lock;
+ refcount_t hold_count;
+ struct work_struct work;
+ struct i915_sw_fence chain;
+ struct rb_node rb;
+ u64 __subtree_last;
+ struct i915_address_space *vm;
+ intel_wakeref_t wakeref;
+
+ /**
+ * struct i915_vma_bindinfo - Information needed for async bind
+ * only but that can be dropped after the bind has taken place.
+ * Consider making this a separate argument to the bind_vma
+ * op, coalescing with other arguments like vm, stash, cache_level
+ * and flags
+ * @pages: The pages sg-table.
+ * @page_sizes: Page sizes of the pages.
+ * @pages_rsgt: Refcounted sg-table when delayed object destruction
+ * is supported. May be NULL.
+ * @readonly: Whether the vma should be bound read-only.
+ * @lmem: Whether the vma points to lmem.
+ */
+ struct i915_vma_bindinfo {
+ struct sg_table *pages;
+ struct i915_page_sizes page_sizes;
+ struct i915_refct_sgt *pages_rsgt;
+ bool readonly:1;
+ bool lmem:1;
+ } bi;
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+ struct intel_memory_region *mr;
+#endif
+ const struct i915_vma_ops *ops;
+ void *private;
+ u64 start;
+ u64 node_size;
+ u64 vma_size;
+ u32 page_sizes_gtt;
+
+ u32 bound_flags;
+ bool allocated:1;
+ bool immediate_unbind:1;
+ bool needs_wakeref:1;
+};
+
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+ bool *lockdep_cookie);
+
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+ bool lockdep_cookie);
+
+struct i915_vma_resource *i915_vma_resource_alloc(void);
+
+void i915_vma_resource_free(struct i915_vma_resource *vma_res);
+
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
+
+/**
+ * i915_vma_resource_get - Take a reference on a vma resource
+ * @vma_res: The vma resource on which to take a reference.
+ *
+ * Return: The @vma_res pointer
+ */
+static inline struct i915_vma_resource
+*i915_vma_resource_get(struct i915_vma_resource *vma_res)
+{
+ dma_fence_get(&vma_res->unbind_fence);
+ return vma_res;
+}
+
+/**
+ * i915_vma_resource_put - Release a reference to a struct i915_vma_resource
+ * @vma_res: The resource
+ */
+static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
+{
+ dma_fence_put(&vma_res->unbind_fence);
+}
+
+/**
+ * i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ * @vm: Pointer to the vm.
+ * @pages: The pages sg-table.
+ * @page_sizes: Page sizes of the pages.
+ * @pages_rsgt: Pointer to a struct i915_refct_sgt of an object with
+ * delayed destruction.
+ * @readonly: Whether the vma should be bound read-only.
+ * @lmem: Whether the vma points to lmem.
+ * @mr: The memory region of the object the vma points to.
+ * @ops: The backend ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @size: Bind size.
+ *
+ * Initializes a vma resource allocated using i915_vma_resource_alloc().
+ * The reason for having separate allocate and initialize function is that
+ * initialization may need to be performed from under a lock where
+ * allocation is not allowed.
+ */
+static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
+ struct i915_address_space *vm,
+ struct sg_table *pages,
+ const struct i915_page_sizes *page_sizes,
+ struct i915_refct_sgt *pages_rsgt,
+ bool readonly,
+ bool lmem,
+ struct intel_memory_region *mr,
+ const struct i915_vma_ops *ops,
+ void *private,
+ u64 start,
+ u64 node_size,
+ u64 size)
+{
+ __i915_vma_resource_init(vma_res);
+ vma_res->vm = vm;
+ vma_res->bi.pages = pages;
+ vma_res->bi.page_sizes = *page_sizes;
+ if (pages_rsgt)
+ vma_res->bi.pages_rsgt = i915_refct_sgt_get(pages_rsgt);
+ vma_res->bi.readonly = readonly;
+ vma_res->bi.lmem = lmem;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+ vma_res->mr = mr;
+#endif
+ vma_res->ops = ops;
+ vma_res->private = private;
+ vma_res->start = start;
+ vma_res->node_size = node_size;
+ vma_res->vma_size = size;
+}
+
+static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
+{
+ GEM_BUG_ON(refcount_read(&vma_res->hold_count) != 1);
+ if (vma_res->bi.pages_rsgt)
+ i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+ i915_sw_fence_fini(&vma_res->chain);
+}
+
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+ u64 first,
+ u64 last,
+ bool intr);
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+ struct i915_sw_fence *sw_fence,
+ u64 first,
+ u64 last,
+ bool intr,
+ gfp_t gfp);
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm);
+
+void i915_vma_resource_module_exit(void);
+
+int i915_vma_resource_module_init(void);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c
deleted file mode 100644
index 2949ceea9884..000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "i915_vma_snapshot.h"
-#include "i915_vma_types.h"
-#include "i915_vma.h"
-
-/**
- * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
- struct i915_vma *vma,
- const char *name)
-{
- if (!i915_vma_is_pinned(vma))
- assert_object_held(vma->obj);
-
- vsnap->name = name;
- vsnap->size = vma->size;
- vsnap->obj_size = vma->obj->base.size;
- vsnap->gtt_offset = vma->node.start;
- vsnap->gtt_size = vma->node.size;
- vsnap->page_sizes = vma->page_sizes.gtt;
- vsnap->pages = vma->pages;
- vsnap->pages_rsgt = NULL;
- vsnap->mr = NULL;
- if (vma->obj->mm.rsgt)
- vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
- vsnap->mr = vma->obj->mm.region;
- kref_init(&vsnap->kref);
- vsnap->vma_resource = &vma->active;
- vsnap->onstack = false;
- vsnap->present = true;
-}
-
-/**
- * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma, but avoid kfreeing it on last put.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
- struct i915_vma *vma,
- const char *name)
-{
- i915_vma_snapshot_init(vsnap, vma, name);
- vsnap->onstack = true;
-}
-
-static void vma_snapshot_release(struct kref *ref)
-{
- struct i915_vma_snapshot *vsnap =
- container_of(ref, typeof(*vsnap), kref);
-
- vsnap->present = false;
- if (vsnap->pages_rsgt)
- i915_refct_sgt_put(vsnap->pages_rsgt);
- if (!vsnap->onstack)
- kfree(vsnap);
-}
-
-/**
- * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
- * @vsnap: The pointer reference
- */
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
-{
- kref_put(&vsnap->kref, vma_snapshot_release);
-}
-
-/**
- * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
- * reference and varify that the structure is released
- * @vsnap: The pointer reference
- *
- * This function is intended to be paired with a i915_vma_init_onstack()
- * and should be called before exiting the scope that declared or
- * freeing the structure that embedded @vsnap to verify that all references
- * have been released.
- */
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
-{
- if (!kref_put(&vsnap->kref, vma_snapshot_release))
- GEM_BUG_ON(1);
-}
-
-/**
- * i915_vma_snapshot_resource_pin - Temporarily block the memory the
- * vma snapshot is pointing to from being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
- * to be passed to the paired i915_vma_snapshot_resource_unpin.
- *
- * This function will temporarily try to hold up a fence or similar structure
- * and will therefore enter a fence signaling critical section.
- *
- * Return: true if we succeeded in blocking the memory from being released,
- * false otherwise.
- */
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
- bool *lockdep_cookie)
-{
- bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
-
- if (pinned)
- *lockdep_cookie = dma_fence_begin_signalling();
-
- return pinned;
-}
-
-/**
- * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
- * being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
- *
- * Might leave a fence signalling critical section and signal a fence.
- */
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
- bool lockdep_cookie)
-{
- dma_fence_end_signalling(lockdep_cookie);
-
- return i915_active_release(vsnap->vma_resource);
-}
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h
deleted file mode 100644
index 940581df4622..000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-#ifndef _I915_VMA_SNAPSHOT_H_
-#define _I915_VMA_SNAPSHOT_H_
-
-#include <linux/kref.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-struct i915_active;
-struct i915_refct_sgt;
-struct i915_vma;
-struct intel_memory_region;
-struct sg_table;
-
-/**
- * DOC: Simple utilities for snapshotting GPU vma metadata, later used for
- * error capture. Vi use a separate header for this to avoid issues due to
- * recursive header includes.
- */
-
-/**
- * struct i915_vma_snapshot - Snapshot of vma metadata.
- * @size: The vma size in bytes.
- * @obj_size: The size of the underlying object in bytes.
- * @gtt_offset: The gtt offset the vma is bound to.
- * @gtt_size: The size in bytes allocated for the vma in the GTT.
- * @pages: The struct sg_table pointing to the pages bound.
- * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any.
- * @mr: The memory region pointed for the pages bound.
- * @kref: Reference for this structure.
- * @vma_resource: FIXME: A means to keep the unbind fence from signaling.
- * Temporarily while we have only sync unbinds, and still use the vma
- * active, we use that. With async unbinding we need a signaling refcount
- * for the unbind fence.
- * @page_sizes: The vma GTT page sizes information.
- * @onstack: Whether the structure shouldn't be freed on final put.
- * @present: Whether the structure is present and initialized.
- */
-struct i915_vma_snapshot {
- const char *name;
- size_t size;
- size_t obj_size;
- size_t gtt_offset;
- size_t gtt_size;
- struct sg_table *pages;
- struct i915_refct_sgt *pages_rsgt;
- struct intel_memory_region *mr;
- struct kref kref;
- struct i915_active *vma_resource;
- u32 page_sizes;
- bool onstack:1;
- bool present:1;
-};
-
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
- struct i915_vma *vma,
- const char *name);
-
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
- struct i915_vma *vma,
- const char *name);
-
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
-
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
-
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
- bool *lockdep_cookie);
-
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
- bool lockdep_cookie);
-
-/**
- * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
- * @gfp: Allocation mode.
- *
- * Return: A pointer to a struct i915_vma_snapshot if successful.
- * NULL otherwise.
- */
-static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
-{
- return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
-}
-
-/**
- * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
- *
- * Return: A pointer to a struct i915_vma_snapshot.
- */
-static inline struct i915_vma_snapshot *
-i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
-{
- kref_get(&vsnap->kref);
- return vsnap;
-}
-
-/**
- * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
- * present and initialized.
- *
- * Return: true if present and initialized; false otherwise.
- */
-static inline bool
-i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
-{
- return vsnap && vsnap->present;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index ca575e129ced..88370dadca82 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -95,6 +95,8 @@ enum i915_cache_level;
*
*/
+struct i915_vma_resource;
+
struct intel_remapped_plane_info {
/* in gtt pages */
u32 offset:31;
@@ -247,22 +249,20 @@ struct i915_vma {
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
-#define I915_VMA_ALLOC_BIT 12
-
-#define I915_VMA_ERROR_BIT 13
+#define I915_VMA_ERROR_BIT 12
#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT))
-#define I915_VMA_GGTT_BIT 14
-#define I915_VMA_CAN_FENCE_BIT 15
-#define I915_VMA_USERFAULT_BIT 16
-#define I915_VMA_GGTT_WRITE_BIT 17
+#define I915_VMA_GGTT_BIT 13
+#define I915_VMA_CAN_FENCE_BIT 14
+#define I915_VMA_USERFAULT_BIT 15
+#define I915_VMA_GGTT_WRITE_BIT 16
#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT))
#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT))
#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT))
#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT))
-#define I915_VMA_SCANOUT_BIT 18
+#define I915_VMA_SCANOUT_BIT 17
#define I915_VMA_SCANOUT ((int)BIT(I915_VMA_SCANOUT_BIT))
struct i915_active active;
@@ -291,6 +291,9 @@ struct i915_vma {
struct list_head evict_link;
struct list_head closed_link;
+
+ /** The async vma resource. Protected by the vm_mutex */
+ struct i915_vma_resource *resource;
};
#endif
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 3699b1c539ea..27dcfe6f2429 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -96,7 +96,7 @@ enum intel_platform {
* it is fine for the same bit to be used on multiple parent platforms.
*/
-#define INTEL_SUBPLATFORM_BITS (2)
+#define INTEL_SUBPLATFORM_BITS (3)
#define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1)
/* HSW/BDW/SKL/KBL/CFL */
@@ -109,6 +109,7 @@ enum intel_platform {
/* DG2 */
#define INTEL_SUBPLATFORM_G10 0
#define INTEL_SUBPLATFORM_G11 1
+#define INTEL_SUBPLATFORM_G12 2
/* ADL-S */
#define INTEL_SUBPLATFORM_RPL_S 0
@@ -134,6 +135,7 @@ enum intel_ppgtt_type {
func(has_reset_engine); \
func(has_global_mocs); \
func(has_gt_uc); \
+ func(has_guc_deprivilege); \
func(has_l3_dpf); \
func(has_llc); \
func(has_logical_ring_contexts); \
diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c
index a4b16b9e2e55..ac1a796b2808 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -122,6 +122,15 @@ static const struct intel_step_info dg2_g11_revid_step_tbl[] = {
[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
};
+static const struct intel_step_info dg2_g12_revid_step_tbl[] = {
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_C0 },
+};
+
+static const struct intel_step_info adls_rpls_revids[] = {
+ [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_D0 },
+ [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
+};
+
void intel_step_init(struct drm_i915_private *i915)
{
const struct intel_step_info *revids = NULL;
@@ -135,12 +144,18 @@ void intel_step_init(struct drm_i915_private *i915)
} else if (IS_DG2_G11(i915)) {
revids = dg2_g11_revid_step_tbl;
size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+ } else if (IS_DG2_G12(i915)) {
+ revids = dg2_g12_revid_step_tbl;
+ size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
} else if (IS_XEHPSDV(i915)) {
revids = xehpsdv_revids;
size = ARRAY_SIZE(xehpsdv_revids);
} else if (IS_ALDERLAKE_P(i915)) {
revids = adlp_revids;
size = ARRAY_SIZE(adlp_revids);
+ } else if (IS_ADLS_RPLS(i915)) {
+ revids = adls_rpls_revids;
+ size = ARRAY_SIZE(adls_rpls_revids);
} else if (IS_ALDERLAKE_S(i915)) {
revids = adls_revids;
size = ARRAY_SIZE(adls_revids);
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index f06d21005106..322fb9eeb880 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -43,6 +43,7 @@
/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
#define GEN11_WOPCM_SIZE SZ_2M
#define GEN9_WOPCM_SIZE SZ_1M
+#define MAX_WOPCM_SIZE SZ_8M
/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
#define WOPCM_RESERVED_SIZE SZ_16K
@@ -207,6 +208,14 @@ static bool __wopcm_regs_locked(struct intel_uncore *uncore,
return true;
}
+static bool __wopcm_regs_writable(struct intel_uncore *uncore)
+{
+ if (!HAS_GUC_DEPRIVILEGE(uncore->i915))
+ return true;
+
+ return intel_uncore_read(uncore, GUC_SHIM_CONTROL2) & GUC_IS_PRIVILEGED;
+}
+
/**
* intel_wopcm_init() - Initialize the WOPCM structure.
* @wopcm: pointer to intel_wopcm.
@@ -224,18 +233,19 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
u32 guc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.guc.fw);
u32 huc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.huc.fw);
u32 ctx_rsvd = context_reserved_size(i915);
+ u32 wopcm_size = wopcm->size;
u32 guc_wopcm_base;
u32 guc_wopcm_size;
if (!guc_fw_size)
return;
- GEM_BUG_ON(!wopcm->size);
+ GEM_BUG_ON(!wopcm_size);
GEM_BUG_ON(wopcm->guc.base);
GEM_BUG_ON(wopcm->guc.size);
- GEM_BUG_ON(guc_fw_size >= wopcm->size);
- GEM_BUG_ON(huc_fw_size >= wopcm->size);
- GEM_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+ GEM_BUG_ON(guc_fw_size >= wopcm_size);
+ GEM_BUG_ON(huc_fw_size >= wopcm_size);
+ GEM_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm_size);
if (i915_inject_probe_failure(i915))
return;
@@ -243,6 +253,24 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
if (__wopcm_regs_locked(gt->uncore, &guc_wopcm_base, &guc_wopcm_size)) {
drm_dbg(&i915->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+ /*
+ * Note that to keep things simple (i.e. avoid different
+ * defines per platform) our WOPCM math doesn't always use the
+ * actual WOPCM size, but a value that is less or equal to it.
+ * This is perfectly fine when i915 programs the registers, but
+ * on platforms with GuC deprivilege the registers are not
+ * writable from i915 and are instead pre-programmed by the
+ * bios/IFWI, so there might be a mismatch of sizes.
+ * Instead of handling the size difference, we trust that the
+ * programmed values make sense and disable the relevant check
+ * by using the maximum possible WOPCM size in the verification
+ * math. In the extremely unlikely case that the registers
+ * were pre-programmed with an invalid value, we will still
+ * gracefully fail later during the GuC/HuC dma.
+ */
+ if (!__wopcm_regs_writable(gt->uncore))
+ wopcm_size = MAX_WOPCM_SIZE;
+
goto check;
}
@@ -257,17 +285,17 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
* Need to clamp guc_wopcm_base now to make sure the following math is
* correct. Formal check of whole WOPCM layout will be done below.
*/
- guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+ guc_wopcm_base = min(guc_wopcm_base, wopcm_size - ctx_rsvd);
/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
- guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+ guc_wopcm_size = wopcm_size - ctx_rsvd - guc_wopcm_base;
guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
drm_dbg(&i915->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
check:
- if (__check_layout(i915, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+ if (__check_layout(i915, wopcm_size, guc_wopcm_base, guc_wopcm_size,
guc_fw_size, huc_fw_size)) {
wopcm->guc.base = guc_wopcm_base;
wopcm->guc.size = guc_wopcm_size;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 6a7328d9c361..e5dd82e7e480 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -42,7 +42,7 @@ static int switch_to_context(struct i915_gem_context *ctx)
static void trash_stolen(struct drm_i915_private *i915)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
const u64 slot = ggtt->error_capture.start;
const resource_size_t size = resource_size(&i915->dsm);
unsigned long page;
@@ -100,7 +100,7 @@ static void igt_pm_suspend(struct drm_i915_private *i915)
intel_wakeref_t wakeref;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- i915_ggtt_suspend(&i915->ggtt);
+ i915_ggtt_suspend(to_gt(i915)->ggtt);
i915_gem_suspend_late(i915);
}
}
@@ -110,7 +110,7 @@ static void igt_pm_hibernate(struct drm_i915_private *i915)
intel_wakeref_t wakeref;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- i915_ggtt_suspend(&i915->ggtt);
+ i915_ggtt_suspend(to_gt(i915)->ggtt);
i915_gem_freeze(i915);
i915_gem_freeze_late(i915);
@@ -126,7 +126,7 @@ static void igt_pm_resume(struct drm_i915_private *i915)
* that runtime-pm just works.
*/
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- i915_ggtt_resume(&i915->ggtt);
+ i915_ggtt_resume(to_gt(i915)->ggtt);
i915_gem_resume(i915);
}
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 74a1b2ecf48f..8c6517d29b8e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -118,7 +118,7 @@ static int igt_evict_something(void *arg)
/* Everything is pinned, nothing should happen */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_something(&ggtt->vm,
+ err = i915_gem_evict_something(&ggtt->vm, NULL,
I915_GTT_PAGE_SIZE, 0, 0,
0, U64_MAX,
0);
@@ -133,7 +133,7 @@ static int igt_evict_something(void *arg)
/* Everything is unpinned, we should be able to evict something */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_something(&ggtt->vm,
+ err = i915_gem_evict_something(&ggtt->vm, NULL,
I915_GTT_PAGE_SIZE, 0, 0,
0, U64_MAX,
0);
@@ -205,7 +205,7 @@ static int igt_evict_for_vma(void *arg)
/* Everything is pinned, nothing should happen */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+ err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
mutex_unlock(&ggtt->vm.mutex);
if (err != -ENOSPC) {
pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
@@ -217,7 +217,7 @@ static int igt_evict_for_vma(void *arg)
/* Everything is unpinned, we should be able to evict the node */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+ err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
mutex_unlock(&ggtt->vm.mutex);
if (err) {
pr_err("i915_gem_evict_for_node returned err=%d\n",
@@ -298,7 +298,7 @@ static int igt_evict_for_cache_color(void *arg)
/* Remove just the second vma */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+ err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
mutex_unlock(&ggtt->vm.mutex);
if (err) {
pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
@@ -311,7 +311,7 @@ static int igt_evict_for_cache_color(void *arg)
target.color = I915_CACHE_L3_LLC;
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+ err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
mutex_unlock(&ggtt->vm.mutex);
if (!err) {
pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
@@ -332,6 +332,7 @@ static int igt_evict_vm(void *arg)
{
struct intel_gt *gt = arg;
struct i915_ggtt *ggtt = gt->ggtt;
+ struct i915_gem_ww_ctx ww;
LIST_HEAD(objects);
int err;
@@ -343,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm);
+ err = i915_gem_evict_vm(&ggtt->vm, NULL);
mutex_unlock(&ggtt->vm.mutex);
if (err) {
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -353,9 +354,12 @@ static int igt_evict_vm(void *arg)
unpin_ggtt(ggtt);
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm);
- mutex_unlock(&ggtt->vm.mutex);
+ for_i915_gem_ww(&ww, err, false) {
+ mutex_lock(&ggtt->vm.mutex);
+ err = i915_gem_evict_vm(&ggtt->vm, &ww);
+ mutex_unlock(&ggtt->vm.mutex);
+ }
+
if (err) {
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
err);
@@ -403,7 +407,7 @@ static int igt_evict_contexts(void *arg)
/* Reserve a block so that we know we have enough to fit a few rq */
memset(&hole, 0, sizeof(hole));
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_insert(&ggtt->vm, &hole,
+ err = i915_gem_gtt_insert(&ggtt->vm, NULL, &hole,
PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->vm.total,
PIN_NOEVICT);
@@ -423,7 +427,7 @@ static int igt_evict_contexts(void *arg)
goto out_locked;
}
- if (i915_gem_gtt_insert(&ggtt->vm, &r->node,
+ if (i915_gem_gtt_insert(&ggtt->vm, NULL, &r->node,
1ul << 20, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->vm.total,
PIN_NOEVICT)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 1b508c89468c..e7e6c4b2c81d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -33,6 +33,7 @@
#include "i915_random.h"
#include "i915_selftest.h"
+#include "i915_vma_resource.h"
#include "mock_drm.h"
#include "mock_gem_device.h"
@@ -239,11 +240,11 @@ static int lowlevel_hole(struct i915_address_space *vm,
unsigned long end_time)
{
I915_RND_STATE(seed_prng);
- struct i915_vma *mock_vma;
+ struct i915_vma_resource *mock_vma_res;
unsigned int size;
- mock_vma = kzalloc(sizeof(*mock_vma), GFP_KERNEL);
- if (!mock_vma)
+ mock_vma_res = kzalloc(sizeof(*mock_vma_res), GFP_KERNEL);
+ if (!mock_vma_res)
return -ENOMEM;
/* Keep creating larger objects until one cannot fit into the hole */
@@ -269,7 +270,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
break;
} while (count >>= 1);
if (!count) {
- kfree(mock_vma);
+ kfree(mock_vma_res);
return -ENOMEM;
}
GEM_BUG_ON(!order);
@@ -343,12 +344,12 @@ alloc_vm_end:
break;
}
- mock_vma->pages = obj->mm.pages;
- mock_vma->node.size = BIT_ULL(size);
- mock_vma->node.start = addr;
+ mock_vma_res->bi.pages = obj->mm.pages;
+ mock_vma_res->node_size = BIT_ULL(size);
+ mock_vma_res->start = addr;
with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
- vm->insert_entries(vm, mock_vma,
+ vm->insert_entries(vm, mock_vma_res,
I915_CACHE_NONE, 0);
}
count = n;
@@ -371,7 +372,7 @@ alloc_vm_end:
cleanup_freed_objects(vm->i915);
}
- kfree(mock_vma);
+ kfree(mock_vma_res);
return 0;
}
@@ -386,7 +387,7 @@ static void close_object_list(struct list_head *objects,
vma = i915_vma_instance(obj, vm, NULL);
if (!IS_ERR(vma))
- ignored = i915_vma_unbind(vma);
+ ignored = i915_vma_unbind_unlocked(vma);
list_del(&obj->st_link);
i915_gem_object_put(obj);
@@ -497,7 +498,7 @@ static int fill_hole(struct i915_address_space *vm,
goto err;
}
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n",
__func__, p->name, vma->node.start, vma->node.size,
@@ -570,7 +571,7 @@ static int fill_hole(struct i915_address_space *vm,
goto err;
}
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n",
__func__, p->name, vma->node.start, vma->node.size,
@@ -656,7 +657,7 @@ static int walk_hole(struct i915_address_space *vm,
goto err_put;
}
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("%s unbind failed at %llx + %llx with err=%d\n",
__func__, addr, vma->size, err);
@@ -733,13 +734,13 @@ static int pot_hole(struct i915_address_space *vm,
pr_err("%s incorrect at %llx + %llx\n",
__func__, addr, vma->size);
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
err = -EINVAL;
goto err_obj;
}
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
GEM_BUG_ON(err);
}
@@ -833,13 +834,13 @@ static int drunk_hole(struct i915_address_space *vm,
pr_err("%s incorrect at %llx + %llx\n",
__func__, addr, BIT_ULL(size));
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
err = -EINVAL;
goto err_obj;
}
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
GEM_BUG_ON(err);
if (igt_timeout(end_time,
@@ -907,7 +908,7 @@ static int __shrink_hole(struct i915_address_space *vm,
pr_err("%s incorrect at %llx + %llx\n",
__func__, addr, size);
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
err = -EINVAL;
break;
}
@@ -1123,7 +1124,7 @@ static int exercise_ggtt(struct drm_i915_private *i915,
u64 hole_start, u64 hole_end,
unsigned long end_time))
{
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
u64 hole_start, hole_end, last = 0;
struct drm_mm_node *node;
IGT_TIMEOUT(end_time);
@@ -1183,7 +1184,7 @@ static int igt_ggtt_page(void *arg)
const unsigned int count = PAGE_SIZE/sizeof(u32);
I915_RND_STATE(prng);
struct drm_i915_private *i915 = arg;
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct drm_i915_gem_object *obj;
intel_wakeref_t wakeref;
struct drm_mm_node tmp;
@@ -1280,6 +1281,7 @@ static void track_vma_bind(struct i915_vma *vma)
atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
__i915_gem_object_pin_pages(obj);
vma->pages = obj->mm.pages;
+ vma->resource->bi.pages = vma->pages;
mutex_lock(&vma->vm->mutex);
list_add_tail(&vma->vm_link, &vma->vm->bound_list);
@@ -1337,6 +1339,33 @@ static int igt_mock_drunk(void *arg)
return exercise_mock(ggtt->vm.i915, drunk_hole);
}
+static int reserve_gtt_with_resource(struct i915_vma *vma, u64 offset)
+{
+ struct i915_address_space *vm = vma->vm;
+ struct i915_vma_resource *vma_res;
+ struct drm_i915_gem_object *obj = vma->obj;
+ int err;
+
+ vma_res = i915_vma_resource_alloc();
+ if (IS_ERR(vma_res))
+ return PTR_ERR(vma_res);
+
+ mutex_lock(&vm->mutex);
+ err = i915_gem_gtt_reserve(vm, NULL, &vma->node, obj->base.size,
+ offset,
+ obj->cache_level,
+ 0);
+ if (!err) {
+ i915_vma_resource_init_from_vma(vma_res, vma);
+ vma->resource = vma_res;
+ } else {
+ kfree(vma_res);
+ }
+ mutex_unlock(&vm->mutex);
+
+ return err;
+}
+
static int igt_gtt_reserve(void *arg)
{
struct i915_ggtt *ggtt = arg;
@@ -1371,20 +1400,13 @@ static int igt_gtt_reserve(void *arg)
}
list_add(&obj->st_link, &objects);
-
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out;
}
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
- obj->base.size,
- total,
- obj->cache_level,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = reserve_gtt_with_resource(vma, total);
if (err) {
pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
total, ggtt->vm.total, err);
@@ -1430,13 +1452,7 @@ static int igt_gtt_reserve(void *arg)
goto out;
}
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
- obj->base.size,
- total,
- obj->cache_level,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = reserve_gtt_with_resource(vma, total);
if (err) {
pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
total, ggtt->vm.total, err);
@@ -1466,7 +1482,7 @@ static int igt_gtt_reserve(void *arg)
goto out;
}
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("i915_vma_unbind failed with err=%d!\n", err);
goto out;
@@ -1477,13 +1493,7 @@ static int igt_gtt_reserve(void *arg)
2 * I915_GTT_PAGE_SIZE,
I915_GTT_MIN_ALIGNMENT);
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
- obj->base.size,
- offset,
- obj->cache_level,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = reserve_gtt_with_resource(vma, offset);
if (err) {
pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
total, ggtt->vm.total, err);
@@ -1510,6 +1520,31 @@ out:
return err;
}
+static int insert_gtt_with_resource(struct i915_vma *vma)
+{
+ struct i915_address_space *vm = vma->vm;
+ struct i915_vma_resource *vma_res;
+ struct drm_i915_gem_object *obj = vma->obj;
+ int err;
+
+ vma_res = i915_vma_resource_alloc();
+ if (IS_ERR(vma_res))
+ return PTR_ERR(vma_res);
+
+ mutex_lock(&vm->mutex);
+ err = i915_gem_gtt_insert(vm, NULL, &vma->node, obj->base.size, 0,
+ obj->cache_level, 0, vm->total, 0);
+ if (!err) {
+ i915_vma_resource_init_from_vma(vma_res, vma);
+ vma->resource = vma_res;
+ } else {
+ kfree(vma_res);
+ }
+ mutex_unlock(&vm->mutex);
+
+ return err;
+}
+
static int igt_gtt_insert(void *arg)
{
struct i915_ggtt *ggtt = arg;
@@ -1553,7 +1588,7 @@ static int igt_gtt_insert(void *arg)
/* Check a couple of obviously invalid requests */
for (ii = invalid_insert; ii->size; ii++) {
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
+ err = i915_gem_gtt_insert(&ggtt->vm, NULL, &tmp,
ii->size, ii->alignment,
I915_COLOR_UNEVICTABLE,
ii->start, ii->end,
@@ -1594,12 +1629,7 @@ static int igt_gtt_insert(void *arg)
goto out;
}
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
- obj->base.size, 0, obj->cache_level,
- 0, ggtt->vm.total,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = insert_gtt_with_resource(vma);
if (err == -ENOSPC) {
/* maxed out the GGTT space */
i915_gem_object_put(obj);
@@ -1648,18 +1678,13 @@ static int igt_gtt_insert(void *arg)
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
offset = vma->node.start;
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("i915_vma_unbind failed with err=%d!\n", err);
goto out;
}
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
- obj->base.size, 0, obj->cache_level,
- 0, ggtt->vm.total,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = insert_gtt_with_resource(vma);
if (err) {
pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
total, ggtt->vm.total, err);
@@ -1703,12 +1728,7 @@ static int igt_gtt_insert(void *arg)
goto out;
}
- mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
- obj->base.size, 0, obj->cache_level,
- 0, ggtt->vm.total,
- 0);
- mutex_unlock(&ggtt->vm.mutex);
+ err = insert_gtt_with_resource(vma);
if (err) {
pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
total, ggtt->vm.total, err);
@@ -1738,26 +1758,28 @@ int i915_gem_gtt_mock_selftests(void)
SUBTEST(igt_gtt_insert),
};
struct drm_i915_private *i915;
- struct i915_ggtt *ggtt;
+ struct intel_gt *gt;
int err;
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;
- ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
- if (!ggtt) {
- err = -ENOMEM;
+ /* allocate the ggtt */
+ err = intel_gt_assign_ggtt(to_gt(i915));
+ if (err)
goto out_put;
- }
- mock_init_ggtt(i915, ggtt);
- err = i915_subtests(tests, ggtt);
+ gt = to_gt(i915);
+
+ mock_init_ggtt(gt);
+
+ err = i915_subtests(tests, gt->ggtt);
mock_device_flush(i915);
i915_gem_drain_freed_objects(i915);
- mock_fini_ggtt(ggtt);
- kfree(ggtt);
+ mock_fini_ggtt(gt->ggtt);
+
out_put:
mock_destroy_device(i915);
return err;
@@ -1940,6 +1962,7 @@ static int igt_cs_tlb(void *arg)
struct i915_vm_pt_stash stash = {};
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
+ struct i915_vma_resource *vma_res;
u64 offset;
offset = igt_random_offset(&prng,
@@ -1960,6 +1983,13 @@ static int igt_cs_tlb(void *arg)
if (err)
goto end;
+ vma_res = i915_vma_resource_alloc();
+ if (IS_ERR(vma_res)) {
+ i915_vma_put_pages(vma);
+ err = PTR_ERR(vma_res);
+ goto end;
+ }
+
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_vm_lock_objects(vm, &ww);
@@ -1981,33 +2011,41 @@ end_ww:
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
- if (err)
+ if (err) {
+ kfree(vma_res);
goto end;
+ }
+ i915_vma_resource_init_from_vma(vma_res, vma);
/* Prime the TLB with the dummy pages */
for (i = 0; i < count; i++) {
- vma->node.start = offset + i * PAGE_SIZE;
- vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+ vma_res->start = offset + i * PAGE_SIZE;
+ vm->insert_entries(vm, vma_res, I915_CACHE_NONE,
+ 0);
- rq = submit_batch(ce, vma->node.start);
+ rq = submit_batch(ce, vma_res->start);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
+ i915_vma_resource_fini(vma_res);
+ kfree(vma_res);
goto end;
}
i915_request_put(rq);
}
-
+ i915_vma_resource_fini(vma_res);
i915_vma_put_pages(vma);
err = context_sync(ce);
if (err) {
pr_err("%s: dummy setup timed out\n",
ce->engine->name);
+ kfree(vma_res);
goto end;
}
vma = i915_vma_instance(act, vm, NULL);
if (IS_ERR(vma)) {
+ kfree(vma_res);
err = PTR_ERR(vma);
goto end;
}
@@ -2015,19 +2053,22 @@ end_ww:
i915_gem_object_lock(act, NULL);
err = i915_vma_get_pages(vma);
i915_gem_object_unlock(act);
- if (err)
+ if (err) {
+ kfree(vma_res);
goto end;
+ }
+ i915_vma_resource_init_from_vma(vma_res, vma);
/* Replace the TLB with target batches */
for (i = 0; i < count; i++) {
struct i915_request *rq;
u32 *cs = batch + i * 64 / sizeof(*cs);
u64 addr;
- vma->node.start = offset + i * PAGE_SIZE;
- vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+ vma_res->start = offset + i * PAGE_SIZE;
+ vm->insert_entries(vm, vma_res, I915_CACHE_NONE, 0);
- addr = vma->node.start + i * 64;
+ addr = vma_res->start + i * 64;
cs[4] = MI_NOOP;
cs[6] = lower_32_bits(addr);
cs[7] = upper_32_bits(addr);
@@ -2036,6 +2077,8 @@ end_ww:
rq = submit_batch(ce, addr);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
+ i915_vma_resource_fini(vma_res);
+ kfree(vma_res);
goto end;
}
@@ -2052,6 +2095,8 @@ end_ww:
}
end_spin(batch, count - 1);
+ i915_vma_resource_fini(vma_res);
+ kfree(vma_res);
i915_vma_put_pages(vma);
err = context_sync(ce);
@@ -2115,7 +2160,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_cs_tlb),
};
- GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
+ GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total));
return i915_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 3e673fd1ea4f..c56a0c2cd2f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -783,6 +783,115 @@ out_spin:
return err;
}
+/*
+ * Test to prove a non-preemptable request can be cancelled and a subsequent
+ * request on the same context can successfully complete after cancellation.
+ *
+ * Testing methodology is to create a non-preemptible request and submit it,
+ * wait for spinner to start, create a NOP request and submit it, cancel the
+ * spinner, wait for spinner to complete and verify it failed with an error,
+ * finally wait for NOP request to complete verify it succeeded without an
+ * error. Preemption timeout also reduced / restored so test runs in a timely
+ * maner.
+ */
+static int __cancel_reset(struct drm_i915_private *i915,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq, *nop;
+ unsigned long preempt_timeout_ms;
+ int err = 0;
+
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT ||
+ !intel_has_reset_engine(engine->gt))
+ return 0;
+
+ preempt_timeout_ms = engine->props.preempt_timeout_ms;
+ engine->props.preempt_timeout_ms = 100;
+
+ if (igt_spinner_init(&spin, engine->gt))
+ goto out_restore;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_spin;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ pr_debug("%s: Cancelling active non-preemptable request\n",
+ engine->name);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("Failed to start spinner on %s\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_rq;
+ }
+
+ nop = intel_context_create_request(ce);
+ if (IS_ERR(nop))
+ goto out_rq;
+ i915_request_get(nop);
+ i915_request_add(nop);
+
+ i915_request_cancel(rq, -EINTR);
+
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("%s: Failed to cancel hung request\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_nop;
+ }
+
+ if (rq->fence.error != -EINTR) {
+ pr_err("%s: fence not cancelled (%u)\n",
+ engine->name, rq->fence.error);
+ err = -EINVAL;
+ goto out_nop;
+ }
+
+ if (i915_request_wait(nop, 0, HZ) < 0) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("%s: Failed to complete nop request\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_nop;
+ }
+
+ if (nop->fence.error != 0) {
+ pr_err("%s: Nop request errored (%u)\n",
+ engine->name, nop->fence.error);
+ err = -EINVAL;
+ }
+
+out_nop:
+ i915_request_put(nop);
+out_rq:
+ i915_request_put(rq);
+out_ce:
+ intel_context_put(ce);
+out_spin:
+ igt_spinner_fini(&spin);
+out_restore:
+ engine->props.preempt_timeout_ms = preempt_timeout_ms;
+ if (err)
+ pr_err("%s: %s error %d\n", __func__, engine->name, err);
+ return err;
+}
+
static int live_cancel_request(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -815,6 +924,14 @@ static int live_cancel_request(void *arg)
return err;
if (err2)
return err2;
+
+ /* Expects reset so call outside of igt_live_test_* */
+ err = __cancel_reset(i915, engine);
+ if (err)
+ return err;
+
+ if (igt_flush_test(i915))
+ return -EIO;
}
return 0;
@@ -844,7 +961,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
intel_gt_chipset_flush(to_gt(i915));
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index cc6f9af679fb..6921ba128015 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -341,7 +341,7 @@ static int igt_vma_pin1(void *arg)
if (!err) {
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("Failed to unbind single page from GGTT, err=%d\n", err);
goto out;
@@ -692,7 +692,7 @@ static int igt_vma_rotate_remap(void *arg)
}
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("Unbinding returned %i\n", err);
goto out_object;
@@ -853,7 +853,7 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma);
nvma++;
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("Unbinding returned %i\n", err);
goto out_object;
@@ -892,7 +892,7 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma);
- err = i915_vma_unbind(vma);
+ err = i915_vma_unbind_unlocked(vma);
if (err) {
pr_err("Unbinding returned %i\n", err);
goto out_object;
@@ -923,26 +923,28 @@ int i915_vma_mock_selftests(void)
SUBTEST(igt_vma_partial),
};
struct drm_i915_private *i915;
- struct i915_ggtt *ggtt;
+ struct intel_gt *gt;
int err;
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;
- ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
- if (!ggtt) {
- err = -ENOMEM;
+ /* allocate the ggtt */
+ err = intel_gt_assign_ggtt(to_gt(i915));
+ if (err)
goto out_put;
- }
- mock_init_ggtt(i915, ggtt);
- err = i915_subtests(tests, ggtt);
+ gt = to_gt(i915);
+
+ mock_init_ggtt(gt);
+
+ err = i915_subtests(tests, gt->ggtt);
mock_device_flush(i915);
i915_gem_drain_freed_objects(i915);
- mock_fini_ggtt(ggtt);
- kfree(ggtt);
+ mock_fini_ggtt(gt->ggtt);
+
out_put:
mock_destroy_device(i915);
return err;
@@ -983,7 +985,7 @@ static int igt_vma_remapped_gtt(void *arg)
intel_wakeref_t wakeref;
int err = 0;
- if (!i915_ggtt_has_aperture(&i915->ggtt))
+ if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
return 0;
obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index b84594601d30..b484e12df417 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915)
cond_resched();
- if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) {
+ if (intel_gt_wait_for_idle(gt, HZ * 3) == -ETIME) {
pr_err("%pS timed out, cancelling all further testing.\n",
__builtin_return_address(0));
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 79e819334b4e..573d9b2e1a4a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -69,7 +69,7 @@ static void mock_device_release(struct drm_device *dev)
i915_gem_drain_workqueue(i915);
i915_gem_drain_freed_objects(i915);
- mock_fini_ggtt(&i915->ggtt);
+ mock_fini_ggtt(to_gt(i915)->ggtt);
destroy_workqueue(i915->wq);
intel_region_ttm_device_fini(i915);
@@ -196,8 +196,13 @@ struct drm_i915_private *mock_gem_device(void)
mock_init_contexts(i915);
- mock_init_ggtt(i915, &i915->ggtt);
- to_gt(i915)->vm = i915_vm_get(&i915->ggtt.vm);
+ /* allocate the ggtt */
+ ret = intel_gt_assign_ggtt(to_gt(i915));
+ if (ret)
+ goto err_unlock;
+
+ mock_init_ggtt(to_gt(i915));
+ to_gt(i915)->vm = i915_vm_get(&to_gt(i915)->ggtt->vm);
mkwrite_device_info(i915)->platform_engine_mask = BIT(0);
to_gt(i915)->info.engine_mask = BIT(0);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 1802baf80a17..568840e7ca66 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -33,23 +33,23 @@ static void mock_insert_page(struct i915_address_space *vm,
}
static void mock_insert_entries(struct i915_address_space *vm,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level level, u32 flags)
{
}
static void mock_bind_ppgtt(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
- set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
+ vma_res->bound_flags |= flags;
}
static void mock_unbind_ppgtt(struct i915_address_space *vm,
- struct i915_vma *vma)
+ struct i915_vma_resource *vma_res)
{
}
@@ -93,23 +93,23 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
static void mock_bind_ggtt(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
- struct i915_vma *vma,
+ struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
u32 flags)
{
}
static void mock_unbind_ggtt(struct i915_address_space *vm,
- struct i915_vma *vma)
+ struct i915_vma_resource *vma_res)
{
}
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
+void mock_init_ggtt(struct intel_gt *gt)
{
- memset(ggtt, 0, sizeof(*ggtt));
+ struct i915_ggtt *ggtt = gt->ggtt;
- ggtt->vm.gt = to_gt(i915);
- ggtt->vm.i915 = i915;
+ ggtt->vm.gt = gt;
+ ggtt->vm.i915 = gt->i915;
ggtt->vm.is_ggtt = true;
ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
@@ -128,7 +128,6 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.unbind_vma = mock_unbind_ggtt;
i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
- to_gt(i915)->ggtt = ggtt;
}
void mock_fini_ggtt(struct i915_ggtt *ggtt)
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h
index e3f224f43beb..d6eb90bd7f3f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.h
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h
@@ -27,8 +27,9 @@
struct drm_i915_private;
struct i915_ggtt;
+struct intel_gt;
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt);
+void mock_init_ggtt(struct intel_gt *gt);
void mock_fini_ggtt(struct i915_ggtt *ggtt);
struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name);