5 files changed, 10890 insertions, 0 deletions
diff --git a/rr-cache/262ab4574fb8b4717f8a62b4197ecfb8b015ab7f/preimage b/rr-cache/262ab4574fb8b4717f8a62b4197ecfb8b015ab7f/preimage
new file mode 100644
index 000000000000..875cfe7509b8
--- /dev/null
+++ b/rr-cache/262ab4574fb8b4717f8a62b4197ecfb8b015ab7f/preimage
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_exec_queue.h"
+
+#include <linux/nospec.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+#include "xe_hw_engine_group.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_ring_ops_types.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+enum xe_exec_queue_sched_prop {
+	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
+	XE_EXEC_QUEUE_TIMESLICE = 1,
+	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
+	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
+};
+
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number);
+
+static void __xe_exec_queue_free(struct xe_exec_queue *q)
+{
+	if (q->vm)
+		xe_vm_put(q->vm);
+
+	if (q->xef)
+		xe_file_put(q->xef);
+
+	kfree(q);
+}
+
+static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
+						   struct xe_vm *vm,
+						   u32 logical_mask,
+						   u16 width, struct xe_hw_engine *hwe,
+						   u32 flags, u64 extensions)
+{
+	struct xe_exec_queue *q;
+	struct xe_gt *gt = hwe->gt;
+	int err;
+
+	/* only kernel queues can be permanent */
+	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
+
+	q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL);
+	if (!q)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&q->refcount);
+	q->flags = flags;
+	q->hwe = hwe;
+	q->gt = gt;
+	q->class = hwe->class;
+	q->width = width;
+	q->logical_mask = logical_mask;
+	q->fence_irq = &gt->fence_irq[hwe->class];
+	q->ring_ops = gt->ring_ops[hwe->class];
+	q->ops = gt->exec_queue_ops;
+	INIT_LIST_HEAD(&q->lr.link);
+	INIT_LIST_HEAD(&q->multi_gt_link);
+	INIT_LIST_HEAD(&q->hw_engine_group_link);
+
+	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
+	q->sched_props.preempt_timeout_us =
+				hwe->eclass->sched_props.preempt_timeout_us;
+	q->sched_props.job_timeout_ms =
+				hwe->eclass->sched_props.job_timeout_ms;
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
+	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
+	else
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+
+	if (vm)
+		q->vm = xe_vm_get(vm);
+
+	if (extensions) {
+		/*
+		 * may set q->usm, must come before xe_lrc_create(),
+		 * may overwrite q->sched_props, must come before q->ops->init()
+		 */
+		err = exec_queue_user_extensions(xe, q, extensions, 0);
+		if (err) {
+			__xe_exec_queue_free(q);
+			return ERR_PTR(err);
+		}
+	}
+
+	return q;
+}
+
+static int __xe_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct xe_vm *vm = q->vm;
+	int i, err;
+
+	if (vm) {
+		err = xe_vm_lock(vm, true);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < q->width; ++i) {
+		q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
+		if (IS_ERR(q->lrc[i])) {
+			err = PTR_ERR(q->lrc[i]);
+			goto err_unlock;
+		}
+	}
+
+	if (vm)
+		xe_vm_unlock(vm);
+
+	err = q->ops->init(q);
+	if (err)
+		goto err_lrc;
+
+	return 0;
+
+err_unlock:
+	if (vm)
+		xe_vm_unlock(vm);
+err_lrc:
+	for (i = i - 1; i >= 0; --i)
+		xe_lrc_put(q->lrc[i]);
+	return err;
+}
+
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hwe, u32 flags,
+					   u64 extensions)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
+				  extensions);
+	if (IS_ERR(q))
+		return q;
+
+	err = __xe_exec_queue_init(q);
+	if (err)
+		goto err_post_alloc;
+
+	return q;
+
+err_post_alloc:
+	__xe_exec_queue_free(q);
+	return ERR_PTR(err);
+}
+
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class,
+						 u32 flags, u64 extensions)
+{
+	struct xe_hw_engine *hwe, *hwe0 = NULL;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class == class) {
+			logical_mask |= BIT(hwe->logical_instance);
+			if (!hwe0)
+				hwe0 = hwe;
+		}
+	}
+
+	if (!logical_mask)
+		return ERR_PTR(-ENODEV);
+
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions);
+}
+
+/**
+ * xe_exec_queue_create_bind() - Create bind exec queue.
+ * @xe: Xe device.
+ * @tile: tile which bind exec queue belongs to.
+ * @flags: exec queue creation flags
+ * @extensions: exec queue creation extensions
+ *
+ * Normalize bind exec queue creation. Bind exec queue is tied to migration VM
+ * for access to physical memory required for page table programming. On a
+ * faulting devices the reserved copy engine instance must be used to avoid
+ * deadlocking (user binds cannot get stuck behind faults as kernel binds which
+ * resolve faults depend on user binds). On non-faulting devices any copy engine
+ * can be used.
+ *
+ * Returns exec queue on success, ERR_PTR on failure
+ */
+struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
+						struct xe_tile *tile,
+						u32 flags, u64 extensions)
+{
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_exec_queue *q;
+	struct xe_vm *migrate_vm;
+
+	migrate_vm = xe_migrate_get_vm(tile->migrate);
+	if (xe->info.has_usm) {
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+							   XE_ENGINE_CLASS_COPY,
+							   gt->usm.reserved_bcs_instance,
+							   false);
+
+<<<<<<<
+		if (!hwe)
+			return ERR_PTR(-EINVAL);
+=======
+		if (!hwe) {
+			xe_vm_put(migrate_vm);
+			return ERR_PTR(-EINVAL);
+		}
+>>>>>>>
+
+		q = xe_exec_queue_create(xe, migrate_vm,
+					 BIT(hwe->logical_instance), 1, hwe,
+					 flags, extensions);
+	} else {
+		q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+					       XE_ENGINE_CLASS_COPY, flags,
+					       extensions);
+	}
+	xe_vm_put(migrate_vm);
+
+	return q;
+}
+
+void xe_exec_queue_destroy(struct kref *ref)
+{
+	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
+	struct xe_exec_queue *eq, *next;
+
+	xe_exec_queue_last_fence_put_unlocked(q);
+	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
+					 multi_gt_link)
+			xe_exec_queue_put(eq);
+	}
+
+	q->ops->fini(q);
+}
+
+void xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+	int i;
+
+	for (i = 0; i < q->width; ++i)
+		xe_lrc_put(q->lrc[i]);
+	__xe_exec_queue_free(q);
+}
+
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
+{
+	switch (q->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		snprintf(q->name, sizeof(q->name), "rcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		snprintf(q->name, sizeof(q->name), "vcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		snprintf(q->name, sizeof(q->name), "vecs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		snprintf(q->name, sizeof(q->name), "bcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		snprintf(q->name, sizeof(q->name), "ccs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_OTHER:
+		snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
+		break;
+	default:
+		XE_WARN_ON(q->class);
+	}
+}
+
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_exec_queue *q;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_load(&xef->exec_queue.xa, id);
+	if (q)
+		xe_exec_queue_get(q);
+	mutex_unlock(&xef->exec_queue.lock);
+
+	return q;
+}
+
+enum xe_exec_queue_priority
+xe_exec_queue_device_get_max_priority(struct xe_device *xe)
+{
+	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
+				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
+}
+
+static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
+				   u64 value)
+{
+	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
+		return -EPERM;
+
+	q->sched_props.priority = value;
+	return 0;
+}
+
+static bool xe_exec_queue_enforce_schedule_limit(void)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	return true;
+#else
+	return !capable(CAP_SYS_NICE);
+#endif
+}
+
+static void
+xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
+			      enum xe_exec_queue_sched_prop prop,
+			      u32 *min, u32 *max)
+{
+	switch (prop) {
+	case XE_EXEC_QUEUE_JOB_TIMEOUT:
+		*min = eclass->sched_props.job_timeout_min;
+		*max = eclass->sched_props.job_timeout_max;
+		break;
+	case XE_EXEC_QUEUE_TIMESLICE:
+		*min = eclass->sched_props.timeslice_min;
+		*max = eclass->sched_props.timeslice_max;
+		break;
+	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+		*min = eclass->sched_props.preempt_timeout_min;
+		*max = eclass->sched_props.preempt_timeout_max;
+		break;
+	default:
+		break;
+	}
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	if (capable(CAP_SYS_NICE)) {
+		switch (prop) {
+		case XE_EXEC_QUEUE_JOB_TIMEOUT:
+			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+			break;
+		case XE_EXEC_QUEUE_TIMESLICE:
+			*min = XE_HW_ENGINE_TIMESLICE_MIN;
+			*max = XE_HW_ENGINE_TIMESLICE_MAX;
+			break;
+		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+			break;
+		default:
+			break;
+		}
+	}
+#endif
+}
+
+static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
+				    u64 value)
+{
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
+
+	q->sched_props.timeslice_us = value;
+	return 0;
+}
+
+typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
+					     struct xe_exec_queue *q,
+					     u64 value);
+
+static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+};
+
+static int exec_queue_user_ext_set_property(struct xe_device *xe,
+					    struct xe_exec_queue *q,
+					    u64 extension)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_set_property ext;
+	int err;
+	u32 idx;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.property >=
+			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
+	    XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
+			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+	if (!exec_queue_set_property_funcs[idx])
+		return -EINVAL;
+
+	return exec_queue_set_property_funcs[idx](xe, q, ext.value);
+}
+
+typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
+					       struct xe_exec_queue *q,
+					       u64 extension);
+
+static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct drm_xe_user_extension ext;
+	int err;
+	u32 idx;
+
+	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.name >=
+			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.name,
+				 ARRAY_SIZE(exec_queue_user_extension_funcs));
+	err = exec_queue_user_extension_funcs[idx](xe, q, extensions);
+	if (XE_IOCTL_DBG(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return exec_queue_user_extensions(xe, q, ext.next_extension,
+						  ++ext_number);
+
+	return 0;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				      struct drm_xe_engine_class_instance *eci,
+				      u16 width, u16 num_placements)
+{
+	int len = width * num_placements;
+	int i, j, n;
+	u16 class;
+	u16 gt_id;
+	u32 return_mask = 0, prev_mask;
+
+	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
+			 len > 1))
+		return 0;
+
+	for (i = 0; i < width; ++i) {
+		u32 current_mask = 0;
+
+		for (j = 0; j < num_placements; ++j) {
+			struct xe_hw_engine *hwe;
+
+			n = j * width + i;
+
+			hwe = xe_hw_engine_lookup(xe, eci[n]);
+			if (XE_IOCTL_DBG(xe, !hwe))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
+				return 0;
+
+			class = eci[n].engine_class;
+			gt_id = eci[n].gt_id;
+
+			if (width == 1 || !i)
+				return_mask |= BIT(eci[n].engine_instance);
+			current_mask |= BIT(eci[n].engine_instance);
+		}
+
+		/* Parallel submissions must be logically contiguous */
+		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
+			return 0;
+
+		prev_mask = current_mask;
+	}
+
+	return return_mask;
+}
+
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_create *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_hw_engine *hwe;
+	struct xe_vm *vm;
+	struct xe_gt *gt;
+	struct xe_tile *tile;
+	struct xe_exec_queue *q = NULL;
+	u32 logical_mask;
+	u32 id;
+	u32 len;
+	int err;
+
+	if (XE_IOCTL_DBG(xe, args->flags) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	len = args->width * args->num_placements;
+	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
+	err = __copy_from_user(eci, user_eci,
+			       sizeof(struct drm_xe_engine_class_instance) *
+			       len);
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
+		return -EINVAL;
+
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+		if (XE_IOCTL_DBG(xe, args->width != 1) ||
+		    XE_IOCTL_DBG(xe, args->num_placements != 1) ||
+		    XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
+			return -EINVAL;
+
+		for_each_tile(tile, xe, id) {
+			struct xe_exec_queue *new;
+			u32 flags = EXEC_QUEUE_FLAG_VM;
+
+			if (id)
+				flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
+
+			new = xe_exec_queue_create_bind(xe, tile, flags,
+							args->extensions);
+			if (IS_ERR(new)) {
+				err = PTR_ERR(new);
+				if (q)
+					goto put_exec_queue;
+				return err;
+			}
+			if (id == 0)
+				q = new;
+			else
+				list_add_tail(&new->multi_gt_list,
+					      &q->multi_gt_link);
+		}
+	} else {
+		gt = xe_device_get_gt(xe, eci[0].gt_id);
+		logical_mask = calc_validate_logical_mask(xe, gt, eci,
+							  args->width,
+							  args->num_placements);
+		if (XE_IOCTL_DBG(xe, !logical_mask))
+			return -EINVAL;
+
+		hwe = xe_hw_engine_lookup(xe, eci[0]);
+		if (XE_IOCTL_DBG(xe, !hwe))
+			return -EINVAL;
+
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+
+		err = down_read_interruptible(&vm->lock);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+
+		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+			up_read(&vm->lock);
+			xe_vm_put(vm);
+			return -ENOENT;
+		}
+
+		q = xe_exec_queue_create(xe, vm, logical_mask,
+					 args->width, hwe, 0,
+					 args->extensions);
+		up_read(&vm->lock);
+		xe_vm_put(vm);
+		if (IS_ERR(q))
+			return PTR_ERR(q);
+
+		if (xe_vm_in_preempt_fence_mode(vm)) {
+			q->lr.context = dma_fence_context_alloc(1);
+
+			err = xe_vm_add_compute_exec_queue(vm, q);
+			if (XE_IOCTL_DBG(xe, err))
+				goto put_exec_queue;
+		}
+
+		if (q->vm && q->hwe->hw_engine_group) {
+			err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
+			if (err)
+				goto put_exec_queue;
+		}
+	}
+
+	mutex_lock(&xef->exec_queue.lock);
+	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (err)
+		goto kill_exec_queue;
+
+	args->exec_queue_id = id;
+	q->xef = xe_file_get(xef);
+
+	return 0;
+
+kill_exec_queue:
+	xe_exec_queue_kill(q);
+put_exec_queue:
+	xe_exec_queue_put(q);
+	return err;
+}
+
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_get_property *args = data;
+	struct xe_exec_queue *q;
+	int ret;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	switch (args->property) {
+	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
+		args->value = q->ops->reset_status(q);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	xe_exec_queue_put(q);
+
+	return ret;
+}
+
+/**
+ * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue is long-running, false otherwise.
+ */
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
+{
+	return q->vm && xe_vm_in_lr_mode(q->vm) &&
+		!(q->flags & EXEC_QUEUE_FLAG_VM);
+}
+
+static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
+{
+	return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
+}
+
+/**
+ * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue's ring is full, false otherwise.
+ */
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
+{
+	struct xe_lrc *lrc = q->lrc[0];
+	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
+
+	return xe_exec_queue_num_job_inflight(q) >= max_job;
+}
+
+/**
+ * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
+ * @q: The exec_queue
+ *
+ * FIXME: Need to determine what to use as the short-lived
+ * timeline lock for the exec_queues, so that the return value
+ * of this function becomes more than just an advisory
+ * snapshot in time. The timeline lock must protect the
+ * seqno from racing submissions on the same exec_queue.
+ * Typically vm->resv, but user-created timeline locks use the migrate vm
+ * and never grabs the migrate vm->resv so we have a race there.
+ *
+ * Return: True if the exec_queue is idle, false otherwise.
+ */
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
+{
+	if (xe_exec_queue_is_parallel(q)) {
+		int i;
+
+		for (i = 0; i < q->width; ++i) {
+			if (xe_lrc_seqno(q->lrc[i]) !=
+			    q->lrc[i]->fence_ctx.next_seqno - 1)
+				return false;
+		}
+
+		return true;
+	}
+
+	return xe_lrc_seqno(q->lrc[0]) ==
+		q->lrc[0]->fence_ctx.next_seqno - 1;
+}
+
+/**
+ * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue
+ * from hw
+ * @q: The exec queue
+ *
+ * Update the timestamp saved by HW for this exec queue and save run ticks
+ * calculated by using the delta from last update.
+ */
+void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
+{
+	struct xe_file *xef;
+	struct xe_lrc *lrc;
+	u32 old_ts, new_ts;
+
+	/*
+	 * Jobs that are run during driver load may use an exec_queue, but are
+	 * not associated with a user xe file, so avoid accumulating busyness
+	 * for kernel specific work.
+	 */
+	if (!q->vm || !q->vm->xef)
+		return;
+
+	xef = q->vm->xef;
+
+	/*
+	 * Only sample the first LRC. For parallel submission, all of them are
+	 * scheduled together and we compensate that below by multiplying by
+	 * width - this may introduce errors if that premise is not true and
+	 * they don't exit 100% aligned. On the other hand, looping through
+	 * the LRCs and reading them in different time could also introduce
+	 * errors.
+	 */
+	lrc = q->lrc[0];
+	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
+	xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+}
+
+/**
+ * xe_exec_queue_kill - permanently stop all execution from an exec queue
+ * @q: The exec queue
+ *
+ * This function permanently stops all activity on an exec queue. If the queue
+ * is actively executing on the HW, it will be kicked off the engine; any
+ * pending jobs are discarded and all future submissions are rejected.
+ * This function is safe to call multiple times.
+ */
+void xe_exec_queue_kill(struct xe_exec_queue *q)
+{
+	struct xe_exec_queue *eq = q, *next;
+
+	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
+				 multi_gt_link) {
+		q->ops->kill(eq);
+		xe_vm_remove_compute_exec_queue(q->vm, eq);
+	}
+
+	q->ops->kill(q);
+	xe_vm_remove_compute_exec_queue(q->vm, q);
+}
+
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_destroy *args = data;
+	struct xe_exec_queue *q;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	if (q->vm && q->hwe->hw_engine_group)
+		xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+
+	xe_exec_queue_kill(q);
+
+	trace_xe_exec_queue_close(q);
+	xe_exec_queue_put(q);
+
+	return 0;
+}
+
+static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
+						    struct xe_vm *vm)
+{
+	if (q->flags & EXEC_QUEUE_FLAG_VM) {
+		lockdep_assert_held(&vm->lock);
+	} else {
+		xe_vm_assert_held(vm);
+		lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem);
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_put() - Drop ref to last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ */
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	xe_exec_queue_last_fence_put_unlocked(q);
+}
+
+/**
+ * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
+ * @q: The exec queue
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
+{
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_get() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, takes a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
+					       struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence &&
+	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+		xe_exec_queue_last_fence_put(q, vm);
+
+	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+	dma_fence_get(fence);
+	return fence;
+}
+
+/**
+ * xe_exec_queue_last_fence_get_for_resume() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, takes a ref. Only safe to be called in the context of
+ * resuming the hw engine group's long-running exec queue, when the group
+ * semaphore is held.
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q,
+							  struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem);
+
+	if (q->last_fence &&
+	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+		xe_exec_queue_last_fence_put_unlocked(q);
+
+	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+	dma_fence_get(fence);
+	return fence;
+}
+
+/**
+ * xe_exec_queue_last_fence_set() - Set last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ * @fence: The fence
+ *
+ * Set the last fence for the engine. Increases reference count for fence, when
+ * closing engine xe_exec_queue_last_fence_put should be called.
+ */
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
+				  struct dma_fence *fence)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	xe_exec_queue_last_fence_put(q, vm);
+	q->last_fence = dma_fence_get(fence);
+}
+
+/**
+ * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Returns:
+ * -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
+ */
+int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+	int err = 0;
+
+	fence = xe_exec_queue_last_fence_get(q, vm);
+	if (fence) {
+		err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
+			0 : -ETIME;
+		dma_fence_put(fence);
+	}
+
+	return err;
+}
diff --git a/rr-cache/635fd67462fc46da6940386c9a115a321714c539/preimage b/rr-cache/635fd67462fc46da6940386c9a115a321714c539/preimage
new file mode 100644
index 000000000000..dc287840e526
--- /dev/null
+++ b/rr-cache/635fd67462fc46da6940386c9a115a321714c539/preimage
@@ -0,0 +1,3586 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_print.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/ascii85.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <generated/xe_wa_oob.h>
+
+#include "regs/xe_gtt_defs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pat.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sync.h"
+#include "xe_trace_bo.h"
+#include "xe_wa.h"
+#include "xe_hmm.h"
+
+static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
+{
+	return vm->gpuvm.r_obj;
+}
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+	return mmu_interval_check_retry(&uvma->userptr.notifier,
+					uvma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+
+	lockdep_assert_held(&vm->lock);
+	xe_assert(xe, xe_vma_is_userptr(vma));
+
+	return xe_hmm_userptr_populate_range(uvma, false);
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (!q->lr.pfence ||
+		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &q->lr.pfence->flags)) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_exec_queues)
+		return 0;
+
+	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (q->lr.pfence) {
+			long timeout = dma_fence_wait(q->lr.pfence, false);
+
+			if (timeout < 0)
+				return -ETIME;
+			dma_fence_put(q->lr.pfence);
+			q->lr.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static bool xe_vm_is_idle(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (!xe_exec_queue_is_idle(q))
+			return false;
+	}
+
+	return true;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_exec_queue *q;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		xe_assert(vm->xe, link != list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     q, q->lr.context,
+					     ++q->lr.seqno);
+		dma_fence_put(q->lr.pfence);
+		q->lr.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	xe_bo_assert_held(bo);
+
+	if (!vm->preempt.num_exec_queues)
+		return 0;
+
+	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
+	if (err)
+		return err;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
+		if (q->lr.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   q->lr.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+	return 0;
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
+						struct drm_exec *exec)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		q->ops->resume(q);
+
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm->gpuvm,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
+		.num_fences = 1,
+	};
+	struct drm_exec *exec = &vm_exec.exec;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+
+	down_write(&vm->lock);
+	err = drm_gpuvm_exec_lock(&vm_exec);
+	if (err)
+		goto out_up_write;
+
+	pfence = xe_preempt_fence_create(q, q->lr.context,
+					 ++q->lr.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_fini;
+	}
+
+	list_add(&q->lr.link, &vm->preempt.exec_queues);
+	++vm->preempt.num_exec_queues;
+	q->lr.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
+				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_fini:
+	drm_exec_fini(exec);
+out_up_write:
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
+ * @vm: The VM.
+ * @q: The exec_queue
+ */
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	if (!xe_vm_in_preempt_fence_mode(vm))
+		return;
+
+	down_write(&vm->lock);
+	list_del(&q->lr.link);
+	--vm->preempt.num_exec_queues;
+	if (q->lr.pfence) {
+		dma_fence_enable_sw_signaling(q->lr.pfence);
+		dma_fence_put(q->lr.pfence);
+		q->lr.pfence = NULL;
+	}
+	up_write(&vm->lock);
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
+
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (unlocked)
+		xe_vm_lock(vm, false);
+
+	vm->flags |= XE_VM_FLAG_BANNED;
+	trace_xe_vm_kill(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
+		q->ops->kill(q);
+
+	if (unlocked)
+		xe_vm_unlock(vm);
+
+	/* TODO: Inform user the VM is banned */
+}
+
+/**
+ * xe_vm_validate_should_retry() - Whether to retry after a validate error.
+ * @exec: The drm_exec object used for locking before validation.
+ * @err: The error returned from ttm_bo_validate().
+ * @end: A ktime_t cookie that should be set to 0 before first use and
+ * that should be reused on subsequent calls.
+ *
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout. Check if that is necessary, and
+ * if so unlock the drm_exec's objects while keeping the ticket to prepare
+ * for a rerun.
+ *
+ * Return: true if a retry after drm_exec_init() is recommended;
+ * false otherwise.
+ */
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
+{
+	ktime_t cur;
+
+	if (err != -ENOMEM)
+		return false;
+
+	cur = ktime_get();
+	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+	if (!ktime_before(cur, *end))
+		return false;
+
+	msleep(20);
+	return true;
+}
+
+static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+	struct drm_gpuva *gpuva;
+	int ret;
+
+	lockdep_assert_held(&vm->lock);
+	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
+		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
+			       &vm->rebind_list);
+
+	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+	if (ret)
+		return ret;
+
+	vm_bo->evicted = false;
+	return 0;
+}
+
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+			  unsigned int num_fences)
+{
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	do {
+		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+		if (ret)
+			return ret;
+
+		ret = xe_vm_rebind(vm, false);
+		if (ret)
+			return ret;
+	} while (!list_empty(&vm->gpuvm.evict.list));
+
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		ret = dma_resv_reserve_fences(obj->resv, num_fences);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
+				 bool *done)
+{
+	int err;
+
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
+	if (err)
+		return err;
+
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		*done = true;
+		return 0;
+	}
+
+	if (!preempt_fences_waiting(vm)) {
+		*done = true;
+		return 0;
+	}
+
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
+	if (err)
+		return err;
+
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		return err;
+
+	/*
+	 * Add validation and rebinding to the locking loop since both can
+	 * cause evictions which may require blocing dma_resv locks.
+	 * The fence reservation here is intended for the new preempt fences
+	 * we attach at the end of the rebind work.
+	 */
+	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct drm_exec exec;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	ktime_t end = 0;
+	int err = 0;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm)) {
+		up_write(&vm->lock);
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+retry:
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+	drm_exec_until_all_locked(&exec) {
+		bool done = false;
+
+		err = xe_preempt_work_begin(&exec, vm, &done);
+		drm_exec_retry_on_contention(&exec);
+		if (err || done) {
+			drm_exec_fini(&exec);
+			if (err && xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+
+			goto out_unlock_outer;
+		}
+	}
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	err = xe_vm_rebind(vm, true);
+	if (err)
+		goto out_unlock;
+
+	/* Wait on rebinds and munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	spin_lock(&vm->xe->ttm.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&vm->xe->ttm.lru_lock);
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm, &exec);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	drm_exec_fini(&exec);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+
+	if (err) {
+		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+		xe_vm_kill(vm, true);
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
+	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	xe_assert(vm->xe, xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!userptr->initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) &&
+	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&userptr->invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(xe_vm_resv(vm),
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_userptr_vma *uvma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&uvma->userptr.invalidate_link);
+		list_move_tail(&uvma->userptr.repin_link,
+			       &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+				 userptr.repin_link) {
+		err = xe_vma_userptr_pin_pages(uvma);
+		if (err == -EFAULT) {
+			list_del_init(&uvma->userptr.repin_link);
+
+			/* Wait for pending binds */
+			xe_vm_lock(vm, false);
+			dma_resv_wait_timeout(xe_vm_resv(vm),
+					      DMA_RESV_USAGE_BOOKKEEP,
+					      false, MAX_SCHEDULE_TIMEOUT);
+
+			err = xe_vm_invalidate_vma(&uvma->vma);
+			xe_vm_unlock(vm);
+			if (err)
+				return err;
+		} else {
+			if (err < 0)
+				return err;
+
+			list_del_init(&uvma->userptr.repin_link);
+			list_move_tail(&uvma->vma.combined_links.rebind,
+				       &vm->rebind_list);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+				  u8 tile_mask)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->tile_mask = tile_mask;
+	op->base.op = DRM_GPUVA_OP_MAP;
+	op->base.map.va.addr = vma->gpuva.va.addr;
+	op->base.map.va.range = vma->gpuva.va.range;
+	op->base.map.gem.obj = vma->gpuva.gem.obj;
+	op->base.map.gem.offset = vma->gpuva.gem.offset;
+	op->map.vma = vma;
+	op->map.immediate = true;
+	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+	op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+				u8 tile_mask)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	xe_vm_populate_rebind(op, vma, tile_mask);
+	list_add_tail(&op->link, &vops->list);
+
+	return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs);
+
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence;
+	struct xe_vma *vma, *next;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+	    list_empty(&vm->rebind_list))
+		return 0;
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
+		xe_assert(vm->xe, vma->tile_present);
+
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+
+		err = xe_vm_ops_add_rebind(&vops, vma,
+					   vma->tile_present);
+		if (err)
+			goto free_ops;
+	}
+
+	fence = ops_execute(vm, &vops);
+	if (IS_ERR(fence)) {
+		err = PTR_ERR(fence);
+	} else {
+		dma_fence_put(fence);
+		list_for_each_entry_safe(vma, next, &vm->rebind_list,
+					 combined_links.rebind)
+			list_del_init(&vma->combined_links.rebind);
+	}
+free_ops:
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+
+	return err;
+}
+
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
+	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+	if (err)
+		return ERR_PTR(err);
+
+	fence = ops_execute(vm, &vops);
+
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+
+	return fence;
+}
+
+static void xe_vma_free(struct xe_vma *vma)
+{
+	if (xe_vma_is_userptr(vma))
+		kfree(to_userptr_vma(vma));
+	else
+		kfree(vma);
+}
+
+#define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
+#define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+#define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    u16 pat_index, unsigned int flags)
+{
+	struct xe_vma *vma;
+	struct xe_tile *tile;
+	u8 id;
+	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
+	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
+
+	xe_assert(vm->xe, start < end);
+	xe_assert(vm->xe, end < vm->size);
+
+	/*
+	 * Allocate and ensure that the xe_vma_is_userptr() return
+	 * matches what was allocated.
+	 */
+	if (!bo && !is_null) {
+		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
+
+		if (!uvma)
+			return ERR_PTR(-ENOMEM);
+
+		vma = &uvma->vma;
+	} else {
+		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		if (is_null)
+			vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+		if (bo)
+			vma->gpuva.gem.obj = &bo->ttm.base;
+	}
+
+	INIT_LIST_HEAD(&vma->combined_links.rebind);
+
+	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
+	vma->gpuva.vm = &vm->gpuvm;
+	vma->gpuva.va.addr = start;
+	vma->gpuva.va.range = end - start + 1;
+	if (read_only)
+		vma->gpuva.flags |= XE_VMA_READ_ONLY;
+	if (dumpable)
+		vma->gpuva.flags |= XE_VMA_DUMPABLE;
+
+	for_each_tile(tile, vm->xe, id)
+		vma->tile_mask |= 0x1 << id;
+
+	if (vm->xe->info.has_atomic_enable_pte_bit)
+		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+
+	vma->pat_index = pat_index;
+
+	if (bo) {
+		struct drm_gpuvm_bo *vm_bo;
+
+		xe_bo_assert_held(bo);
+
+		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
+		if (IS_ERR(vm_bo)) {
+			xe_vma_free(vma);
+			return ERR_CAST(vm_bo);
+		}
+
+		drm_gpuvm_bo_extobj_add(vm_bo);
+		drm_gem_object_get(&bo->ttm.base);
+		vma->gpuva.gem.offset = bo_offset_or_userptr;
+		drm_gpuva_link(&vma->gpuva, vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+	} else /* userptr or null */ {
+		if (!is_null) {
+			struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+			u64 size = end - start + 1;
+			int err;
+
+			INIT_LIST_HEAD(&userptr->invalidate_link);
+			INIT_LIST_HEAD(&userptr->repin_link);
+			vma->gpuva.gem.offset = bo_offset_or_userptr;
+
+			err = mmu_interval_notifier_insert(&userptr->notifier,
+							   current->mm,
+							   xe_vma_userptr(vma), size,
+							   &vma_userptr_notifier_ops);
+			if (err) {
+				xe_vma_free(vma);
+				return ERR_PTR(err);
+			}
+
+			userptr->notifier_seq = LONG_MAX;
+		}
+
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	if (vma->ufence) {
+		xe_sync_ufence_put(vma->ufence);
+		vma->ufence = NULL;
+	}
+
+	if (xe_vma_is_userptr(vma)) {
+		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+		struct xe_userptr *userptr = &uvma->userptr;
+
+		if (userptr->sg)
+			xe_hmm_userptr_free_sg(uvma);
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&userptr->notifier);
+		xe_vm_put(vm);
+	} else if (xe_vma_is_null(vma)) {
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(xe_vma_bo(vma));
+	}
+
+	xe_vma_free(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held_write(&vm->lock);
+	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
+
+	if (xe_vma_is_userptr(vma)) {
+		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
+
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	} else if (!xe_vma_is_null(vma)) {
+		xe_bo_assert_held(xe_vma_bo(vma));
+
+		drm_gpuva_unlink(&vma->gpuva);
+	}
+
+	xe_vm_assert_held(vm);
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+/**
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
+ * @exec: The drm_exec object we're currently locking for.
+ * @vma: The vma for witch we want to lock the vm resv and any attached
+ * object's resv.
+ *
+ * Return: 0 on success, negative error code on error. In particular
+ * may return -EDEADLK on WW transaction contention and -EINTR if
+ * an interruptible wait is terminated by a signal.
+ */
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err;
+
+	XE_WARN_ON(!vm);
+
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (!err && bo && !bo->vm)
+		err = drm_exec_lock_obj(exec, &bo->ttm.base);
+
+	return err;
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct drm_exec exec;
+	int err;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_lock_vma(&exec, vma);
+		drm_exec_retry_on_contention(&exec);
+		if (XE_WARN_ON(err))
+			break;
+	}
+
+	xe_vma_destroy(vma, NULL);
+
+	drm_exec_fini(&exec);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
+{
+	struct drm_gpuva *gpuva;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
+		return NULL;
+
+	xe_assert(vm->xe, start + range <= vm->size);
+
+	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
+
+	return gpuva ? gpuva_to_vma(gpuva) : NULL;
+}
+
+static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	mutex_lock(&vm->snap_mutex);
+	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
+	XE_WARN_ON(err);	/* Shouldn't be possible */
+
+	return err;
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	mutex_lock(&vm->snap_mutex);
+	drm_gpuva_remove(&vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static struct drm_gpuva_op *xe_vm_op_alloc(void)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+
+	if (unlikely(!op))
+		return NULL;
+
+	return &op->base;
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm);
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+	.op_alloc = xe_vm_op_alloc,
+	.vm_bo_validate = xe_gpuvm_validate,
+	.vm_free = xe_vm_free,
+};
+
+static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
+				u32 pt_level)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	if (pat_index & BIT(2)) {
+		if (pt_level)
+			pte |= XE_PPGTT_PDE_PDPE_PAT2;
+		else
+			pte |= XE_PPGTT_PTE_PAT2;
+	}
+
+	if (pat_index & BIT(3))
+		pte |= XELPG_PPGTT_PTE_PAT3;
+
+	if (pat_index & (BIT(4)))
+		pte |= XE2_PPGTT_PTE_PAT4;
+
+	return pte;
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
+
+	if (pt_level == 1)
+		return XE_PDE_PS_2M;
+	else if (pt_level == 2)
+		return XE_PDPE_PS_1G;
+
+	return 0;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      const u16 pat_index)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_pat_index(xe, pat_index);
+
+	return pde;
+}
+
+static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
+			       u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
+				u16 pat_index,
+				u32 pt_level, bool devmem, u64 flags)
+{
+	u64 pte;
+
+	/* Avoid passing random bits directly as flags */
+	xe_assert(xe, !(flags & ~XE_PTE_PS64));
+
+	pte = addr;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (devmem)
+		pte |= XE_PPGTT_PTE_DM;
+
+	pte |= flags;
+
+	return pte;
+}
+
+static const struct xe_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_pte_encode_bo,
+	.pte_encode_vma = xelp_pte_encode_vma,
+	.pte_encode_addr = xelp_pte_encode_addr,
+	.pde_encode_bo = xelp_pde_encode_bo,
+};
+
+static void vm_destroy_work_func(struct work_struct *w);
+
+/**
+ * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given tile and vm.
+ * @xe: xe device.
+ * @tile: tile to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf PTE. All pagetable entries point to the single page-table or,
+ * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
+ * writes become NOPs.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm)
+{
+	u8 id = tile->id;
+	int i;
+
+	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+static void xe_vm_free_scratch(struct xe_vm *vm)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	if (!xe_vm_has_scratch(vm))
+		return;
+
+	for_each_tile(tile, vm->xe, id) {
+		u32 i;
+
+		if (!vm->pt_root[id])
+			continue;
+
+		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
+			if (vm->scratch_pt[id][i])
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
+	}
+}
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct drm_gem_object *vm_resv_obj;
+	struct xe_vm *vm;
+	int err, number_tiles = 0;
+	struct xe_tile *tile;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+
+	vm->size = 1ull << xe->info.va_bits;
+
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+	mutex_init(&vm->snap_mutex);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.exec_queues);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_init(&vm->rftree[id]);
+
+	vm->pt_ops = &xelp_pt_ops;
+
+	/*
+	 * Long-running workloads are not protected by the scheduler references.
+	 * By design, run_job for long-running workloads returns NULL and the
+	 * scheduler drops all the references of it, hence protecting the VM
+	 * for this case is necessary.
+	 */
+	if (flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_get_noresume(xe);
+
+	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+	if (!vm_resv_obj) {
+		err = -ENOMEM;
+		goto err_no_resv;
+	}
+
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
+		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
+
+	drm_gem_object_put(vm_resv_obj);
+
+	err = xe_vm_lock(vm, true);
+	if (err)
+		goto err_close;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAG_64K;
+
+	for_each_tile(tile, xe, id) {
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    tile->id != XE_VM_FLAG_TILE_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_unlock_close;
+		}
+	}
+
+	if (xe_vm_has_scratch(vm)) {
+		for_each_tile(tile, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_vm_create_scratch(xe, tile, vm);
+			if (err)
+				goto err_unlock_close;
+		}
+		vm->batch_invalidate_tlb = true;
+	}
+
+	if (vm->flags & XE_VM_FLAG_LR_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->batch_invalidate_tlb = false;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_tile(tile, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+	}
+	xe_vm_unlock(vm);
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_tile(tile, xe, id) {
+			struct xe_gt *gt = tile->primary_gt;
+			struct xe_vm *migrate_vm;
+			struct xe_exec_queue *q;
+			u32 create_flags = EXEC_QUEUE_FLAG_VM;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			migrate_vm = xe_migrate_get_vm(tile->migrate);
+			q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+						       XE_ENGINE_CLASS_COPY,
+						       create_flags);
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(q)) {
+				err = PTR_ERR(q);
+				goto err_close;
+			}
+			vm->q[id] = q;
+			number_tiles++;
+		}
+	}
+
+	if (number_tiles > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	mutex_lock(&xe->usm.lock);
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode++;
+	else if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode++;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_unlock_close:
+	xe_vm_unlock(vm);
+err_close:
+	xe_vm_close_and_put(vm);
+	return ERR_PTR(err);
+
+err_no_resv:
+	mutex_destroy(&vm->snap_mutex);
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+	kfree(vm);
+	if (flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_put(xe);
+	return ERR_PTR(err);
+}
+
+static void xe_vm_close(struct xe_vm *vm)
+{
+	down_write(&vm->lock);
+	vm->size = 0;
+	up_write(&vm->lock);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	LIST_HEAD(contested);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	struct xe_vma *vma, *next_vma;
+	struct drm_gpuva *gpuva, *next;
+	u8 id;
+
+	xe_assert(xe, !vm->preempt.num_exec_queues);
+
+	xe_vm_close(vm);
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	down_write(&vm->lock);
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id])
+			xe_exec_queue_last_fence_put(vm->q[id], vm);
+	}
+	up_write(&vm->lock);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id]) {
+			xe_exec_queue_kill(vm->q[id]);
+			xe_exec_queue_put(vm->q[id]);
+			vm->q[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, false);
+	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+		vma = gpuva_to_vma(gpuva);
+
+		if (xe_vma_has_no_bo(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags |= XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		xe_vm_remove_vma(vm, vma);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
+			list_del_init(&vma->combined_links.rebind);
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		list_move_tail(&vma->combined_links.destroy, &contested);
+		vma->gpuva.flags |= XE_VMA_DESTROYED;
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	xe_vm_free_scratch(vm);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm);
+
+	/*
+	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+	 * Since we hold a refcount to the bo, we can remove and free
+	 * the members safely without locking.
+	 */
+	list_for_each_entry_safe(vma, next_vma, &contested,
+				 combined_links.destroy) {
+		list_del_init(&vma->combined_links.destroy);
+		xe_vma_destroy_unlocked(vma);
+	}
+
+	up_write(&vm->lock);
+
+<<<<<<<
+	down_write(&xe->usm.lock);
+=======
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+
+>>>>>>>
+	if (vm->usm.asid) {
+		void *lookup;
+
+		xe_assert(xe, xe->info.has_asid);
+		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		xe_assert(xe, lookup == vm);
+	}
+	up_write(&xe->usm.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	u8 id;
+
+	/* xe_vm_close_and_put was not called? */
+	xe_assert(xe, !vm->size);
+
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	mutex_destroy(&vm->snap_mutex);
+
+	if (vm->flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_put(xe);
+
+	for_each_tile(tile, xe, id)
+		XE_WARN_ON(vm->pt_root[id]);
+
+	trace_xe_vm_free(vm);
+
+	if (vm->xef)
+		xe_file_put(vm->xef);
+
+	kfree(vm);
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xef->vm.lock);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
+{
+	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
+					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+}
+
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	return q ? q : vm->q[0];
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool first_op, bool last_op)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	struct xe_tile *tile;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	int cur_fence = 0;
+	int number_tiles = hweight8(vma->tile_present);
+	int err;
+	u8 id;
+
+	trace_xe_vma_unbind(vma);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_present & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
+					   first_op ? syncs : NULL,
+					   first_op ? num_syncs : 0);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	fence = cf ? &cf->base : !fence ?
+		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
+
+	return fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       u8 tile_mask, bool first_op, bool last_op)
+{
+	struct xe_tile *tile;
+	struct dma_fence *fence;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	int cur_fence = 0;
+	int number_tiles = hweight8(tile_mask);
+	int err;
+	u8 id;
+
+	trace_xe_vma_bind(vma);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(tile_mask & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
+					 first_op ? syncs : NULL,
+					 first_op ? num_syncs : 0,
+					 vma->tile_present & BIT(id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	return cf ? &cf->base : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct xe_user_fence *
+find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct xe_sync_entry *e = &syncs[i];
+
+		if (xe_sync_is_ufence(e))
+			return xe_sync_ufence_get(e);
+	}
+
+	return NULL;
+}
+
+static struct dma_fence *
+xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
+	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
+{
+	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
+
+	if (immediate) {
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
+				       first_op, last_op);
+		if (IS_ERR(fence))
+			return fence;
+	} else {
+		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+	}
+
+	return fence;
+}
+
+static struct dma_fence *
+xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	     u32 num_syncs, bool first_op, bool last_op)
+{
+	struct dma_fence *fence;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
+	if (IS_ERR(fence))
+		return fence;
+
+	return fence;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_tile *tile;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 !xe->info.has_usm))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 xe_device_in_non_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
+			 xe_device_in_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
+		flags |= XE_VM_FLAG_LR_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err)
+		goto err_close_and_put;
+
+	if (xe->info.has_asid) {
+		down_write(&xe->usm.lock);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+				      XA_LIMIT(1, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		up_write(&xe->usm.lock);
+		if (err < 0)
+			goto err_free_id;
+
+		vm->usm.asid = asid;
+	}
+
+	args->vm_id = id;
+	vm->xef = xe_file_get(xef);
+
+	/* Record BO memory for VM pagetable created against client */
+	for_each_tile(tile, xe, id)
+		if (vm->pt_root[id])
+			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
+#endif
+
+	return 0;
+
+err_free_id:
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+err_close_and_put:
+	xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm))
+		err = -ENOENT;
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
+		err = -EBUSY;
+	else
+		xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (!err)
+		xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static struct dma_fence *
+xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	       u32 num_syncs, bool first_op, bool last_op)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+
+	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
+		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
+				  vma->tile_mask, true, first_op, last_op);
+	} else {
+		return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+	}
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
+			     bool post_commit)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->gpuva.flags |= XE_VMA_DESTROYED;
+	up_read(&vm->userptr.notifier_lock);
+	if (post_commit)
+		xe_vm_remove_vma(vm, vma);
+}
+
+#undef ULL
+#define ULL	unsigned long long
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+	struct xe_vma *vma;
+
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
+		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		vma = gpuva_to_vma(op->remap.unmap->va);
+		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->remap.unmap->keep ? 1 : 0);
+		if (op->remap.prev)
+			vm_dbg(&xe->drm,
+			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.prev->va.addr,
+			       (ULL)op->remap.prev->va.range);
+		if (op->remap.next)
+			vm_dbg(&xe->drm,
+			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.next->va.addr,
+			       (ULL)op->remap.next->va.range);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		vma = gpuva_to_vma(op->unmap.va);
+		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma = gpuva_to_vma(op->prefetch.va);
+		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT POSSIBLE");
+	}
+}
+#else
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+}
+#endif
+
+/*
+ * Create operations list from IOCTL arguments, setup operations fields so parse
+ * and commit steps are decoupled from IOCTL arguments. This step can fail.
+ */
+static struct drm_gpuva_ops *
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
+			 u64 bo_offset_or_userptr, u64 addr, u64 range,
+			 u32 operation, u32 flags,
+			 u32 prefetch_region, u16 pat_index)
+{
+	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
+	struct drm_gpuva_ops *ops;
+	struct drm_gpuva_op *__op;
+	struct drm_gpuvm_bo *vm_bo;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	vm_dbg(&vm->xe->drm,
+	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
+	       operation, (ULL)addr, (ULL)range,
+	       (ULL)bo_offset_or_userptr);
+
+	switch (operation) {
+	case DRM_XE_VM_BIND_OP_MAP:
+	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
+		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+						  obj, bo_offset_or_userptr);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP:
+		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_PREFETCH:
+		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
+		xe_assert(vm->xe, bo);
+
+		err = xe_bo_lock(bo, true);
+		if (err)
+			return ERR_PTR(err);
+
+		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
+		if (IS_ERR(vm_bo)) {
+			xe_bo_unlock(bo);
+			return ERR_CAST(vm_bo);
+		}
+
+		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+		xe_bo_unlock(bo);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		ops = ERR_PTR(-EINVAL);
+	}
+	if (IS_ERR(ops))
+		return ops;
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
+			op->map.pat_index = pat_index;
+		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
+			op->prefetch.region = prefetch_region;
+		}
+
+		print_op(vm->xe, __op);
+	}
+
+	return ops;
+}
+
+static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
+			      u16 pat_index, unsigned int flags)
+{
+	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct drm_exec exec;
+	struct xe_vma *vma;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo) {
+		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+		drm_exec_until_all_locked(&exec) {
+			err = 0;
+			if (!bo->vm) {
+				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (!err) {
+				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (err) {
+				drm_exec_fini(&exec);
+				return ERR_PTR(err);
+			}
+		}
+	}
+	vma = xe_vma_create(vm, bo, op->gem.offset,
+			    op->va.addr, op->va.addr +
+			    op->va.range - 1, pat_index, flags);
+	if (IS_ERR(vma))
+		goto err_unlock;
+
+	if (xe_vma_is_userptr(vma))
+		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+	else if (!xe_vma_has_no_bo(vma) && !bo->vm)
+		err = add_preempt_fences(vm, bo);
+
+err_unlock:
+	if (bo)
+		drm_exec_fini(&exec);
+
+	if (err) {
+		prep_vma_destroy(vm, vma, false);
+		xe_vma_destroy_unlocked(vma);
+		vma = ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static u64 xe_vma_max_pte_size(struct xe_vma *vma)
+{
+	if (vma->gpuva.flags & XE_VMA_PTE_1G)
+		return SZ_1G;
+	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
+		return SZ_2M;
+	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
+		return SZ_64K;
+	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+		return SZ_4K;
+
+	return SZ_1G;	/* Uninitialized, used max size */
+}
+
+static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+{
+	switch (size) {
+	case SZ_1G:
+		vma->gpuva.flags |= XE_VMA_PTE_1G;
+		break;
+	case SZ_2M:
+		vma->gpuva.flags |= XE_VMA_PTE_2M;
+		break;
+	case SZ_64K:
+		vma->gpuva.flags |= XE_VMA_PTE_64K;
+		break;
+	case SZ_4K:
+		vma->gpuva.flags |= XE_VMA_PTE_4K;
+		break;
+	}
+}
+
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		u8 tile_present =
+			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
+
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+
+		if (op->remap.prev) {
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
+			if (!err && op->remap.skip_prev) {
+				op->remap.prev->tile_present =
+					tile_present;
+				op->remap.prev = NULL;
+			}
+		}
+		if (op->remap.next) {
+			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
+			if (!err && op->remap.skip_next) {
+				op->remap.next->tile_present =
+					tile_present;
+				op->remap.next = NULL;
+			}
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
+				   struct drm_gpuva_ops *ops,
+				   struct xe_sync_entry *syncs, u32 num_syncs,
+				   struct xe_vma_ops *vops, bool last)
+{
+	struct xe_device *xe = vm->xe;
+	struct xe_vma_op *last_op = NULL;
+	struct drm_gpuva_op *__op;
+	struct xe_tile *tile;
+	u8 id, tile_mask = 0;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	for_each_tile(tile, vm->xe, id)
+		tile_mask |= 0x1 << id;
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		struct xe_vma *vma;
+		bool first = list_empty(&vops->list);
+		unsigned int flags = 0;
+
+		INIT_LIST_HEAD(&op->link);
+		list_add_tail(&op->link, &vops->list);
+
+		if (first) {
+			op->flags |= XE_VMA_OP_FIRST;
+			op->num_syncs = num_syncs;
+			op->syncs = syncs;
+		}
+
+		op->q = q;
+		op->tile_mask = tile_mask;
+
+		switch (op->base.op) {
+		case DRM_GPUVA_OP_MAP:
+		{
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
+			flags |= op->map.is_null ?
+				VMA_CREATE_FLAG_IS_NULL : 0;
+			flags |= op->map.dumpable ?
+				VMA_CREATE_FLAG_DUMPABLE : 0;
+
+			vma = new_vma(vm, &op->base.map, op->map.pat_index,
+				      flags);
+			if (IS_ERR(vma))
+				return PTR_ERR(vma);
+
+			op->map.vma = vma;
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP:
+		{
+			struct xe_vma *old =
+				gpuva_to_vma(op->base.remap.unmap->va);
+
+			op->remap.start = xe_vma_start(old);
+			op->remap.range = xe_vma_size(old);
+
+			if (op->base.remap.prev) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.prev = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_end(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_prev) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(vma) -
+						xe_vma_start(old);
+					op->remap.start = xe_vma_end(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
+				}
+			}
+
+			if (op->base.remap.next) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.next = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_next = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_start(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_next) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(old) -
+						xe_vma_start(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
+				}
+			}
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+		case DRM_GPUVA_OP_PREFETCH:
+			/* Nothing to do */
+			break;
+		default:
+			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		}
+
+		last_op = op;
+
+		err = xe_vma_op_commit(vm, op);
+		if (err)
+			return err;
+	}
+
+	/* FIXME: Unhandled corner case */
+	XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
+
+	if (!last_op)
+		return 0;
+
+	if (last) {
+		last_op->flags |= XE_VMA_OP_LAST;
+		last_op->num_syncs = num_syncs;
+		last_op->syncs = syncs;
+	}
+
+	return 0;
+}
+
+static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
+				    struct xe_vma_op *op)
+{
+	struct dma_fence *fence = NULL;
+
+	lockdep_assert_held(&vm->lock);
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+				   op->syncs, op->num_syncs,
+				   op->tile_mask,
+				   op->map.immediate || !xe_vm_in_fault_mode(vm),
+				   op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		bool prev = !!op->remap.prev;
+		bool next = !!op->remap.next;
+
+		if (!op->remap.unmap_done) {
+			if (prev || next)
+				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
+			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+					     op->num_syncs,
+					     op->flags & XE_VMA_OP_FIRST,
+					     op->flags & XE_VMA_OP_LAST &&
+					     !prev && !next);
+			if (IS_ERR(fence))
+				break;
+			op->remap.unmap_done = true;
+		}
+
+		if (prev) {
+			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.prev, op->q,
+					   xe_vma_bo(op->remap.prev), op->syncs,
+					   op->num_syncs,
+					   op->remap.prev->tile_mask, true,
+					   false,
+					   op->flags & XE_VMA_OP_LAST && !next);
+			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (IS_ERR(fence))
+				break;
+			op->remap.prev = NULL;
+		}
+
+		if (next) {
+			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.next, op->q,
+					   xe_vma_bo(op->remap.next),
+					   op->syncs, op->num_syncs,
+					   op->remap.next->tile_mask, true,
+					   false, op->flags & XE_VMA_OP_LAST);
+			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (IS_ERR(fence))
+				break;
+			op->remap.next = NULL;
+		}
+
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+				     op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
+				       op->flags & XE_VMA_OP_FIRST,
+				       op->flags & XE_VMA_OP_LAST);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	if (IS_ERR(fence))
+		trace_xe_vma_fail(vma);
+
+	return fence;
+}
+
+static struct dma_fence *
+__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+		    struct xe_vma_op *op)
+{
+	struct dma_fence *fence;
+	int err;
+
+retry_userptr:
+	fence = op_execute(vm, vma, op);
+	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
+		lockdep_assert_held_write(&vm->lock);
+
+		if (op->base.op == DRM_GPUVA_OP_REMAP) {
+			if (!op->remap.unmap_done)
+				vma = gpuva_to_vma(op->base.remap.unmap->va);
+			else if (op->remap.prev)
+				vma = op->remap.prev;
+			else
+				vma = op->remap.next;
+		}
+
+		if (xe_vma_is_userptr(vma)) {
+			err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+			if (!err)
+				goto retry_userptr;
+
+			fence = ERR_PTR(err);
+			trace_xe_vma_fail(vma);
+		}
+	}
+
+	return fence;
+}
+
+static struct dma_fence *
+xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	struct dma_fence *fence = ERR_PTR(-ENOMEM);
+
+	lockdep_assert_held(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		fence = __xe_vma_op_execute(vm, op->map.vma, op);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma;
+
+		if (!op->remap.unmap_done)
+			vma = gpuva_to_vma(op->base.remap.unmap->va);
+		else if (op->remap.prev)
+			vma = op->remap.prev;
+		else
+			vma = op->remap.next;
+
+		fence = __xe_vma_op_execute(vm, vma, op);
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					    op);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		fence = __xe_vma_op_execute(vm,
+					    gpuva_to_vma(op->base.prefetch.va),
+					    op);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return fence;
+}
+
+static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
+			     bool post_commit, bool prev_post_commit,
+			     bool next_post_commit)
+{
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (op->map.vma) {
+			prep_vma_destroy(vm, op->map.vma, post_commit);
+			xe_vma_destroy_unlocked(op->map.vma);
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
+
+		if (op->remap.prev) {
+			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
+			xe_vma_destroy_unlocked(op->remap.prev);
+		}
+		if (op->remap.next) {
+			prep_vma_destroy(vm, op->remap.next, next_post_commit);
+			xe_vma_destroy_unlocked(op->remap.next);
+		}
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
+				     struct drm_gpuva_ops **ops,
+				     int num_ops_list)
+{
+	int i;
+
+	for (i = num_ops_list - 1; i >= 0; --i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
+
+		if (!__ops)
+			continue;
+
+		drm_gpuva_for_each_op_reverse(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+	}
+}
+
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+				 bool validate)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err = 0;
+
+	if (bo) {
+		if (!bo->vm)
+			err = drm_exec_lock_obj(exec, &bo->ttm.base);
+		if (!err && validate)
+			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+	}
+
+	return err;
+}
+
+static int check_ufence(struct xe_vma *vma)
+{
+	if (vma->ufence) {
+		struct xe_user_fence * const f = vma->ufence;
+
+		if (!xe_sync_ufence_get_status(f))
+			return -EBUSY;
+
+		vma->ufence = NULL;
+		xe_sync_ufence_put(f);
+	}
+
+	return 0;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+			    struct xe_vma_op *op)
+{
+	int err = 0;
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = vma_lock_and_validate(exec, op->map.vma,
+					    !xe_vm_in_fault_mode(vm) ||
+					    op->map.immediate);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+		if (err)
+			break;
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.remap.unmap->va),
+					    false);
+		if (!err && op->remap.prev)
+			err = vma_lock_and_validate(exec, op->remap.prev, true);
+		if (!err && op->remap.next)
+			err = vma_lock_and_validate(exec, op->remap.next, true);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+		if (err)
+			break;
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.unmap.va),
+					    false);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+		u32 region = op->prefetch.region;
+
+		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.prefetch.va),
+					    false);
+		if (!err && !xe_vma_has_no_bo(vma))
+			err = xe_bo_migrate(xe_vma_bo(vma),
+					    region_to_mem_type[region]);
+		break;
+	}
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+					   struct xe_vm *vm,
+					   struct xe_vma_ops *vops)
+{
+	struct xe_vma_op *op;
+	int err;
+
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (err)
+		return err;
+
+	list_for_each_entry(op, &vops->list, link) {
+		err = op_lock_and_prep(exec, vm, op);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops)
+{
+	struct xe_vma_op *op, *next;
+	struct dma_fence *fence = NULL;
+
+	list_for_each_entry_safe(op, next, &vops->list, link) {
+		dma_fence_put(fence);
+		fence = xe_vma_op_execute(vm, op);
+		if (IS_ERR(fence)) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
+				 op->base.op, PTR_ERR(fence));
+			fence = ERR_PTR(-ENOSPC);
+			break;
+		}
+	}
+
+	return fence;
+}
+
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+	if (vma->ufence)
+		xe_sync_ufence_put(vma->ufence);
+	vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+			  struct xe_user_fence *ufence)
+{
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		vma_add_ufence(op->map.vma, ufence);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			vma_add_ufence(op->remap.prev, ufence);
+		if (op->remap.next)
+			vma_add_ufence(op->remap.next, ufence);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+				   struct dma_fence *fence)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_user_fence *ufence;
+	struct xe_vma_op *op;
+	int i;
+
+	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
+	list_for_each_entry(op, &vops->list, link) {
+		if (ufence)
+			op_add_ufence(vm, op, ufence);
+
+		if (op->base.op == DRM_GPUVA_OP_UNMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+		else if (op->base.op == DRM_GPUVA_OP_REMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       fence);
+	}
+	if (ufence)
+		xe_sync_ufence_put(ufence);
+	for (i = 0; i < vops->num_syncs; i++)
+		xe_sync_entry_signal(vops->syncs + i, fence);
+	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops)
+{
+	struct drm_exec exec;
+	struct dma_fence *fence;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			goto unlock;
+
+		fence = ops_execute(vm, vops);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			/* FIXME: Killing VM rather than proper error handling */
+			xe_vm_kill(vm, false);
+			goto unlock;
+		} else {
+			vm_bind_ioctl_ops_fini(vm, vops, fence);
+		}
+	}
+
+unlock:
+	drm_exec_fini(&exec);
+	return err;
+}
+
+#define SUPPORTED_FLAGS	\
+	(DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
+	 DRM_XE_VM_BIND_FLAG_NULL | \
+	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
+#define XE_64K_PAGE_MASK 0xffffull
+#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kvmalloc_array(args->num_binds,
+					   sizeof(struct drm_xe_vm_bind_op),
+					   GFP_KERNEL | __GFP_ACCOUNT);
+		if (!*bind_ops)
+			return -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_DBG(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 flags = (*bind_ops)[i].flags;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
+		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		u16 pat_index = (*bind_ops)[i].pat_index;
+		u16 coh_mode;
+
+		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+		(*bind_ops)[i].pat_index = pat_index;
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, obj && is_null) ||
+		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
+		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
+				 is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP &&
+				 !is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, addr &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, range &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, prefetch_region &&
+				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, !range &&
+				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kvfree(*bind_ops);
+	return err;
+}
+
+static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
+				       struct xe_exec_queue *q,
+				       struct xe_sync_entry *syncs,
+				       int num_syncs)
+{
+	struct dma_fence *fence;
+	int i, err = 0;
+
+	fence = xe_sync_in_fence_get(syncs, num_syncs,
+				     to_wait_exec_queue(vm, q), vm);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], fence);
+
+	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
+				     fence);
+	dma_fence_put(fence);
+
+	return err;
+}
+
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	memset(vops, 0, sizeof(*vops));
+	INIT_LIST_HEAD(&vops->list);
+	vops->vm = vm;
+	vops->q = q;
+	vops->syncs = syncs;
+	vops->num_syncs = num_syncs;
+}
+
+static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
+					u64 addr, u64 range, u64 obj_offset,
+					u16 pat_index)
+{
+	u16 coh_mode;
+
+	if (XE_IOCTL_DBG(xe, range > bo->size) ||
+	    XE_IOCTL_DBG(xe, obj_offset >
+			 bo->size - range)) {
+		return -EINVAL;
+	}
+
+	if (bo->flags & XE_BO_FLAG_INTERNAL_64K) {
+		if (XE_IOCTL_DBG(xe, obj_offset &
+				 XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+			return  -EINVAL;
+		}
+	}
+
+	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+	if (bo->cpu_caching) {
+		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+			return  -EINVAL;
+		}
+	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+		/*
+		 * Imported dma-buf from a different device should
+		 * require 1way or 2way coherency since we don't know
+		 * how it was mapped on the CPU. Just assume is it
+		 * potentially cached on CPU side.
+		 */
+		return  -EINVAL;
+	}
+
+	return 0;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct drm_gpuva_ops **ops = NULL;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q = NULL;
+	u32 num_syncs, num_ufence = 0;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	struct xe_vma_ops vops;
+	int err;
+	int i;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
+	if (err)
+		return err;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q)) {
+			err = -ENOENT;
+			goto free_objs;
+		}
+
+		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_exec_queue;
+		}
+	}
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm)) {
+		err = -EINVAL;
+		goto put_exec_queue;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto put_vm;
+
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_DBG(xe, range > vm->size) ||
+		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto release_vm_lock;
+		}
+	}
+
+	if (args->num_binds) {
+		bos = kvcalloc(args->num_binds, sizeof(*bos),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!bos) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+
+		ops = kvcalloc(args->num_binds, sizeof(*ops),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!ops) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_DBG(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
+						   obj_offset, pat_index);
+		if (err)
+			goto put_obj;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs],
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0) |
+					  (!args->num_binds ?
+					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
+		if (err)
+			goto free_syncs;
+
+		if (xe_sync_is_ufence(&syncs[num_syncs]))
+			num_ufence++;
+	}
+
+	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+		err = -EINVAL;
+		goto free_syncs;
+	}
+
+	if (!args->num_binds) {
+		err = -ENODATA;
+		goto free_syncs;
+	}
+
+	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+						  addr, range, op, flags,
+						  prefetch_region, pat_index);
+		if (IS_ERR(ops[i])) {
+			err = PTR_ERR(ops[i]);
+			ops[i] = NULL;
+			goto unwind_ops;
+		}
+
+		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
+					      &vops, i == args->num_binds - 1);
+		if (err)
+			goto unwind_ops;
+	}
+
+	/* Nothing to do */
+	if (list_empty(&vops.list)) {
+		err = -ENODATA;
+		goto unwind_ops;
+	}
+
+	err = vm_bind_ioctl_ops_execute(vm, &vops);
+
+unwind_ops:
+	if (err && err != -ENODATA)
+		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	for (i = args->num_binds - 1; i >= 0; --i)
+		if (ops[i])
+			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
+free_syncs:
+	if (err == -ENODATA)
+		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
+	while (num_syncs--)
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+
+	kfree(syncs);
+put_obj:
+	for (i = 0; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+release_vm_lock:
+	up_write(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+put_exec_queue:
+	if (q)
+		xe_exec_queue_put(q);
+free_objs:
+	kvfree(bos);
+	kvfree(ops);
+	if (args->num_binds > 1)
+		kvfree(bind_ops);
+	return err;
+}
+
+/**
+ * xe_vm_lock() - Lock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be locked
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is false, the function
+ * always returns 0.
+ */
+int xe_vm_lock(struct xe_vm *vm, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+
+	return dma_resv_lock(xe_vm_resv(vm), NULL);
+}
+
+/**
+ * xe_vm_unlock() - Unlock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_vm_lock().
+ */
+void xe_vm_unlock(struct xe_vm *vm)
+{
+	dma_resv_unlock(xe_vm_resv(vm));
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+	struct xe_tile *tile;
+	struct xe_gt_tlb_invalidation_fence
+		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+	u8 id;
+	u32 fence_id = 0;
+	int ret = 0;
+
+	xe_assert(xe, !xe_vma_is_null(vma));
+	trace_xe_vma_invalidate(vma);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&to_userptr_vma(vma)->userptr.notifier,
+				      to_userptr_vma(vma)->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(xe_vma_bo(vma));
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (xe_pt_zap_ptes(tile, vma)) {
+			xe_device_wmb(xe);
+			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+							  &fence[fence_id],
+							  true);
+
+			ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
+							 &fence[fence_id], vma);
+			if (ret < 0) {
+				xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+				goto wait;
+			}
+			++fence_id;
+
+			if (!tile->media_gt)
+				continue;
+
+			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+							  &fence[fence_id],
+							  true);
+
+			ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
+							 &fence[fence_id], vma);
+			if (ret < 0) {
+				xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+				goto wait;
+			}
+			++fence_id;
+		}
+	}
+
+wait:
+	for (id = 0; id < fence_id; ++id)
+		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
+	vma->tile_invalidated = vma->tile_mask;
+
+	return ret;
+}
+
+struct xe_vm_snapshot {
+	unsigned long num_snaps;
+	struct {
+		u64 ofs, bo_ofs;
+		unsigned long len;
+		struct xe_bo *bo;
+		void *data;
+		struct mm_struct *mm;
+	} snap[];
+};
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
+{
+	unsigned long num_snaps = 0, i;
+	struct xe_vm_snapshot *snap = NULL;
+	struct drm_gpuva *gpuva;
+
+	if (!vm)
+		return NULL;
+
+	mutex_lock(&vm->snap_mutex);
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		if (gpuva->flags & XE_VMA_DUMPABLE)
+			num_snaps++;
+	}
+
+	if (num_snaps)
+		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
+	if (!snap) {
+		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
+		goto out_unlock;
+	}
+
+	snap->num_snaps = num_snaps;
+	i = 0;
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
+		struct xe_bo *bo = vma->gpuva.gem.obj ?
+			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
+
+		if (!(gpuva->flags & XE_VMA_DUMPABLE))
+			continue;
+
+		snap->snap[i].ofs = xe_vma_start(vma);
+		snap->snap[i].len = xe_vma_size(vma);
+		if (bo) {
+			snap->snap[i].bo = xe_bo_get(bo);
+			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
+		} else if (xe_vma_is_userptr(vma)) {
+			struct mm_struct *mm =
+				to_userptr_vma(vma)->userptr.notifier.mm;
+
+			if (mmget_not_zero(mm))
+				snap->snap[i].mm = mm;
+			else
+				snap->snap[i].data = ERR_PTR(-EFAULT);
+
+			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
+		} else {
+			snap->snap[i].data = ERR_PTR(-ENOENT);
+		}
+		i++;
+	}
+
+out_unlock:
+	mutex_unlock(&vm->snap_mutex);
+	return snap;
+}
+
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
+{
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (int i = 0; i < snap->num_snaps; i++) {
+		struct xe_bo *bo = snap->snap[i].bo;
+		struct iosys_map src;
+		int err;
+
+		if (IS_ERR(snap->snap[i].data))
+			continue;
+
+		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
+		if (!snap->snap[i].data) {
+			snap->snap[i].data = ERR_PTR(-ENOMEM);
+			goto cleanup_bo;
+		}
+
+		if (bo) {
+			xe_bo_lock(bo, false);
+			err = ttm_bo_vmap(&bo->ttm, &src);
+			if (!err) {
+				xe_map_memcpy_from(xe_bo_device(bo),
+						   snap->snap[i].data,
+						   &src, snap->snap[i].bo_ofs,
+						   snap->snap[i].len);
+				ttm_bo_vunmap(&bo->ttm, &src);
+			}
+			xe_bo_unlock(bo);
+		} else {
+			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
+
+			kthread_use_mm(snap->snap[i].mm);
+			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
+				err = 0;
+			else
+				err = -EFAULT;
+			kthread_unuse_mm(snap->snap[i].mm);
+
+			mmput(snap->snap[i].mm);
+			snap->snap[i].mm = NULL;
+		}
+
+		if (err) {
+			kvfree(snap->snap[i].data);
+			snap->snap[i].data = ERR_PTR(err);
+		}
+
+cleanup_bo:
+		xe_bo_put(bo);
+		snap->snap[i].bo = NULL;
+	}
+}
+
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
+{
+	unsigned long i, j;
+
+	if (IS_ERR_OR_NULL(snap)) {
+		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+		return;
+	}
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
+
+		if (IS_ERR(snap->snap[i].data)) {
+			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
+				   PTR_ERR(snap->snap[i].data));
+			continue;
+		}
+
+		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
+
+		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
+			u32 *val = snap->snap[i].data + j;
+			char dumped[ASCII85_BUFSZ];
+
+			drm_puts(p, ascii85_encode(*val, dumped));
+		}
+
+		drm_puts(p, "\n");
+	}
+}
+
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
+{
+	unsigned long i;
+
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		if (!IS_ERR(snap->snap[i].data))
+			kvfree(snap->snap[i].data);
+		xe_bo_put(snap->snap[i].bo);
+		if (snap->snap[i].mm)
+			mmput(snap->snap[i].mm);
+	}
+	kvfree(snap);
+}
diff --git a/rr-cache/7b7ff7400ff3161706801f3b7660d75628d77495/preimage b/rr-cache/7b7ff7400ff3161706801f3b7660d75628d77495/preimage
new file mode 100644
index 000000000000..14de2c6197a3
--- /dev/null
+++ b/rr-cache/7b7ff7400ff3161706801f3b7660d75628d77495/preimage
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_pagefault.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_migrate.h"
+#include "xe_trace_bo.h"
+#include "xe_vm.h"
+
+struct pagefault {
+	u64 page_addr;
+	u32 asid;
+	u16 pdata;
+	u8 vfid;
+	u8 access_type;
+	u8 fault_type;
+	u8 fault_level;
+	u8 engine_class;
+	u8 engine_instance;
+	u8 fault_unsuccessful;
+	bool trva_fault;
+};
+
+enum access_type {
+	ACCESS_TYPE_READ = 0,
+	ACCESS_TYPE_WRITE = 1,
+	ACCESS_TYPE_ATOMIC = 2,
+	ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+	NOT_PRESENT = 0,
+	WRITE_ACCESS_VIOLATION = 1,
+	ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+	u64 va_range_base;
+	u32 asid;
+	u32 sub_granularity;
+	u8 granularity;
+	u8 vfid;
+	u8 access_type;
+	u8 engine_class;
+	u8 engine_instance;
+};
+
+static bool access_is_atomic(enum access_type access_type)
+{
+	return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
+{
+	return BIT(tile->id) & vma->tile_present &&
+		!(BIT(tile->id) & vma->tile_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
+{
+	if (page_addr > xe_vma_end(vma) - 1 ||
+	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
+		return false;
+
+	return true;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+	struct xe_vma *vma = NULL;
+
+	if (vm->usm.last_fault_vma) {   /* Fast lookup */
+		if (vma_matches(vm->usm.last_fault_vma, page_addr))
+			vma = vm->usm.last_fault_vma;
+	}
+	if (!vma)
+		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
+
+	return vma;
+}
+
+static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
+		       bool atomic, unsigned int id)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	int err;
+
+	err = xe_vm_lock_vma(exec, vma);
+	if (err)
+		return err;
+
+	if (atomic && IS_DGFX(vm->xe)) {
+		if (xe_vma_is_userptr(vma)) {
+			err = -EACCES;
+			return err;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		err = xe_bo_migrate(bo, XE_PL_VRAM0 + id);
+		if (err)
+			return err;
+	} else if (bo) {
+		/* Create backing store if needed */
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
+				struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct drm_exec exec;
+	struct dma_fence *fence;
+	ktime_t end = 0;
+	int err;
+	bool atomic;
+
+	trace_xe_vma_pagefault(vma);
+	atomic = access_is_atomic(pf->access_type);
+
+	/* Check if VMA is valid */
+	if (vma_is_valid(tile, vma) && !atomic)
+		return 0;
+
+retry_userptr:
+	if (xe_vma_is_userptr(vma) &&
+	    xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
+		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
+		err = xe_vma_userptr_pin_pages(uvma);
+		if (err)
+			return err;
+	}
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_pf_begin(&exec, vma, atomic, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (xe_vm_validate_should_retry(&exec, err, &end))
+			err = -EAGAIN;
+		if (err)
+			goto unlock_dma_resv;
+
+		/* Bind VMA only to the GT that has faulted */
+		trace_xe_vma_pf_bind(vma);
+		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			if (xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+			goto unlock_dma_resv;
+		}
+	}
+
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+	vma->tile_invalidated &= ~BIT(tile->id);
+
+unlock_dma_resv:
+	drm_exec_fini(&exec);
+	if (err == -EAGAIN)
+		goto retry_userptr;
+
+	return err;
+}
+
+static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
+{
+	struct xe_vm *vm;
+
+	down_read(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, asid);
+	if (vm && xe_vm_in_fault_mode(vm))
+		xe_vm_get(vm);
+	else
+		vm = ERR_PTR(-EINVAL);
+	up_read(&xe->usm.lock);
+
+	return vm;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_vm *vm;
+	struct xe_vma *vma = NULL;
+	int err;
+
+	/* SW isn't expected to handle TRTT faults */
+	if (pf->trva_fault)
+		return -EFAULT;
+
+	vm = asid_to_vm(xe, pf->asid);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	/*
+	 * TODO: Change to read lock? Using write lock for simplicity.
+	 */
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed(vm)) {
+		err = -ENOENT;
+		goto unlock_vm;
+	}
+
+	vma = lookup_vma(vm, pf->page_addr);
+	if (!vma) {
+		err = -EINVAL;
+		goto unlock_vm;
+	}
+
+	err = handle_vma_pagefault(tile, pf, vma);
+
+unlock_vm:
+	if (!err)
+		vm->usm.last_fault_vma = vma;
+	up_write(&vm->lock);
+	xe_vm_put(vm);
+
+	return err;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+				struct xe_guc_pagefault_reply *reply)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+		reply->dw0,
+		reply->dw1,
+	};
+
+	return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+	drm_dbg(&xe->drm, "\n\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tPDATA: 0x%04x\n"
+		 "\tFaulted Address: 0x%08x%08x\n"
+		 "\tFaultType: %d\n"
+		 "\tAccessType: %d\n"
+		 "\tFaultLevel: %d\n"
+		 "\tEngineClass: %d\n"
+		 "\tEngineInstance: %d\n",
+		 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+		 lower_32_bits(pf->page_addr),
+		 pf->fault_type, pf->access_type, pf->fault_level,
+		 pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW	4
+
+static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+	const struct xe_guc_pagefault_desc *desc;
+	bool ret = false;
+
+	spin_lock_irq(&pf_queue->lock);
+	if (pf_queue->tail != pf_queue->head) {
+		desc = (const struct xe_guc_pagefault_desc *)
+			(pf_queue->data + pf_queue->tail);
+
+		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
+		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+			PFD_PDATA_HI_SHIFT;
+		pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+		pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+		pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+		pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+		pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+		pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+			PFD_VIRTUAL_ADDR_HI_SHIFT;
+		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+			PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+		pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
+			pf_queue->num_dw;
+		ret = true;
+	}
+	spin_unlock_irq(&pf_queue->lock);
+
+	return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+	lockdep_assert_held(&pf_queue->lock);
+
+	return CIRC_SPACE(pf_queue->head, pf_queue->tail,
+			  pf_queue->num_dw) <=
+		PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct pf_queue *pf_queue;
+	unsigned long flags;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != PF_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(PFD_ASID, msg[1]);
+	pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
+
+	/*
+	 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
+	 */
+	xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW));
+
+	spin_lock_irqsave(&pf_queue->lock, flags);
+	full = pf_queue_full(pf_queue);
+	if (!full) {
+		memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
+		pf_queue->head = (pf_queue->head + len) %
+			pf_queue->num_dw;
+		queue_work(gt->usm.pf_wq, &pf_queue->worker);
+	} else {
+		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
+	}
+	spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+	return full ? -ENOSPC : 0;
+}
+
+#define USM_QUEUE_MAX_RUNTIME_MS	20
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+	struct xe_gt *gt = pf_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc_pagefault_reply reply = {};
+	struct pagefault pf = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_pagefault(pf_queue, &pf)) {
+		ret = handle_pagefault(gt, &pf);
+		if (unlikely(ret)) {
+			print_pagefault(xe, &pf);
+			pf.fault_unsuccessful = 1;
+			drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+		}
+
+		reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+			FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+			FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+			FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+			FIELD_PREP(PFR_ASID, pf.asid);
+
+		reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+			FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+			FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+			FIELD_PREP(PFR_PDATA, pf.pdata);
+
+		send_pagefault_reply(&gt->uc.guc, &reply);
+
+		if (time_after(jiffies, threshold) &&
+		    pf_queue->tail != pf_queue->head) {
+			queue_work(gt->usm.pf_wq, w);
+			break;
+		}
+	}
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+static void pagefault_fini(void *arg)
+{
+	struct xe_gt *gt = arg;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->info.has_usm)
+		return;
+
+	destroy_workqueue(gt->usm.acc_wq);
+	destroy_workqueue(gt->usm.pf_wq);
+}
+
+static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	xe_dss_mask_t all_dss;
+	int num_dss, num_eus;
+
+	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+		  XE_MAX_DSS_FUSE_BITS);
+
+	num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
+	num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
+				XE_MAX_EU_FUSE_BITS) * num_dss;
+
+	/* user can issue separate page faults per EU and per CS */
+	pf_queue->num_dw =
+		(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
+
+	pf_queue->gt = gt;
+<<<<<<<
+	pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,
+				      sizeof(u32), GFP_KERNEL);
+=======
+	pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL);
+>>>>>>>
+	if (!pf_queue->data)
+		return -ENOMEM;
+
+	spin_lock_init(&pf_queue->lock);
+	INIT_WORK(&pf_queue->worker, pf_queue_work_func);
+
+	return 0;
+}
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, ret = 0;
+
+	if (!xe->info.has_usm)
+		return 0;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]);
+		if (ret)
+			return ret;
+	}
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		gt->usm.acc_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.acc_queue[i].lock);
+		INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+	}
+
+	gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+					WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+	if (!gt->usm.pf_wq)
+		return -ENOMEM;
+
+	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+					 WQ_UNBOUND | WQ_HIGHPRI,
+					 NUM_ACC_QUEUE);
+	if (!gt->usm.acc_wq) {
+		destroy_workqueue(gt->usm.pf_wq);
+		return -ENOMEM;
+	}
+
+	return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt);
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.has_usm)
+		return;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		spin_lock_irq(&gt->usm.pf_queue[i].lock);
+		gt->usm.pf_queue[i].head = 0;
+		gt->usm.pf_queue[i].tail = 0;
+		spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+	}
+
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		spin_lock(&gt->usm.acc_queue[i].lock);
+		gt->usm.acc_queue[i].head = 0;
+		gt->usm.acc_queue[i].tail = 0;
+		spin_unlock(&gt->usm.acc_queue[i].lock);
+	}
+}
+
+static int granularity_in_byte(int val)
+{
+	switch (val) {
+	case 0:
+		return SZ_128K;
+	case 1:
+		return SZ_2M;
+	case 2:
+		return SZ_16M;
+	case 3:
+		return SZ_64M;
+	default:
+		return 0;
+	}
+}
+
+static int sub_granularity_in_byte(int val)
+{
+	return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+	drm_warn(&xe->drm, "Access counter request:\n"
+		 "\tType: %s\n"
+		 "\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tEngine: %d:%d\n"
+		 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+		 "\tSub_Granularity Vector: 0x%08x\n"
+		 "\tVA Range base: 0x%016llx\n",
+		 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+		 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+		 granularity_in_byte(acc->granularity) / SZ_1K,
+		 sub_granularity_in_byte(acc->granularity) / SZ_1K,
+		 acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+		sub_granularity_in_byte(acc->granularity);
+
+	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
+	struct xe_vm *vm;
+	struct xe_vma *vma;
+	int ret = 0;
+
+	/* We only support ACC_TRIGGER at the moment */
+	if (acc->access_type != ACC_TRIGGER)
+		return -EINVAL;
+
+	vm = asid_to_vm(xe, acc->asid);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	down_read(&vm->lock);
+
+	/* Lookup VMA */
+	vma = get_acc_vma(vm, acc);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	trace_xe_vma_acc(vma);
+
+	/* Userptr or null can't be migrated, nothing to do */
+	if (xe_vma_has_no_bo(vma))
+		goto unlock_vm;
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, true, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			break;
+	}
+
+	drm_exec_fini(&exec);
+unlock_vm:
+	up_read(&vm->lock);
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+#define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
+
+#define ACC_MSG_LEN_DW        4
+
+static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+	const struct xe_guc_acc_desc *desc;
+	bool ret = false;
+
+	spin_lock(&acc_queue->lock);
+	if (acc_queue->tail != acc_queue->head) {
+		desc = (const struct xe_guc_acc_desc *)
+			(acc_queue->data + acc_queue->tail);
+
+		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+			FIELD_GET(ACC_SUBG_LO, desc->dw0);
+		acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+		acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+		acc->asid =  FIELD_GET(ACC_ASID, desc->dw1);
+		acc->vfid =  FIELD_GET(ACC_VFID, desc->dw2);
+		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+
+		acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
+				  ACC_QUEUE_NUM_DW;
+		ret = true;
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+	struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+	struct xe_gt *gt = acc_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct acc acc = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_acc(acc_queue, &acc)) {
+		ret = handle_acc(gt, &acc);
+		if (unlikely(ret)) {
+			print_acc(xe, &acc);
+			drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+		}
+
+		if (time_after(jiffies, threshold) &&
+		    acc_queue->tail != acc_queue->head) {
+			queue_work(gt->usm.acc_wq, w);
+			break;
+		}
+	}
+}
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+	lockdep_assert_held(&acc_queue->lock);
+
+	return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
+		ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct acc_queue *acc_queue;
+	u32 asid;
+	bool full;
+
+	/*
+	 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
+	 */
+	BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
+
+	if (unlikely(len != ACC_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(ACC_ASID, msg[1]);
+	acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+	spin_lock(&acc_queue->lock);
+	full = acc_queue_full(acc_queue);
+	if (!full) {
+		memcpy(acc_queue->data + acc_queue->head, msg,
+		       len * sizeof(u32));
+		acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
+		queue_work(gt->usm.acc_wq, &acc_queue->worker);
+	} else {
+		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return full ? -ENOSPC : 0;
+}
diff --git a/rr-cache/a89e195941c400aeae1c120837d27c90f6f6603d/preimage b/rr-cache/a89e195941c400aeae1c120837d27c90f6f6603d/preimage
new file mode 100644
index 000000000000..2e7e95255787
--- /dev/null
+++ b/rr-cache/a89e195941c400aeae1c120837d27c90f6f6603d/preimage
@@ -0,0 +1,3420 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_print.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <uapi/drm/xe_drm.h>
+#include <linux/ascii85.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <generated/xe_wa_oob.h>
+
+#include "regs/xe_gtt_defs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pat.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sync.h"
+#include "xe_trace_bo.h"
+#include "xe_wa.h"
+#include "xe_hmm.h"
+
+static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
+{
+	return vm->gpuvm.r_obj;
+}
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+	return mmu_interval_check_retry(&uvma->userptr.notifier,
+					uvma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+
+	lockdep_assert_held(&vm->lock);
+	xe_assert(xe, xe_vma_is_userptr(vma));
+
+	return xe_hmm_userptr_populate_range(uvma, false);
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (!q->lr.pfence ||
+		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &q->lr.pfence->flags)) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_exec_queues)
+		return 0;
+
+	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (q->lr.pfence) {
+			long timeout = dma_fence_wait(q->lr.pfence, false);
+
+			/* Only -ETIME on fence indicates VM needs to be killed */
+			if (timeout < 0 || q->lr.pfence->error == -ETIME)
+				return -ETIME;
+
+			dma_fence_put(q->lr.pfence);
+			q->lr.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static bool xe_vm_is_idle(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		if (!xe_exec_queue_is_idle(q))
+			return false;
+	}
+
+	return true;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_exec_queue *q;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		xe_assert(vm->xe, link != list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     q, q->lr.context,
+					     ++q->lr.seqno);
+		dma_fence_put(q->lr.pfence);
+		q->lr.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	xe_bo_assert_held(bo);
+
+	if (!vm->preempt.num_exec_queues)
+		return 0;
+
+	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
+	if (err)
+		return err;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
+		if (q->lr.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   q->lr.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+	return 0;
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
+						struct drm_exec *exec)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
+		q->ops->resume(q);
+
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm->gpuvm,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
+		.num_fences = 1,
+	};
+	struct drm_exec *exec = &vm_exec.exec;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+
+	down_write(&vm->lock);
+	err = drm_gpuvm_exec_lock(&vm_exec);
+	if (err)
+		goto out_up_write;
+
+	pfence = xe_preempt_fence_create(q, q->lr.context,
+					 ++q->lr.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_fini;
+	}
+
+	list_add(&q->lr.link, &vm->preempt.exec_queues);
+	++vm->preempt.num_exec_queues;
+	q->lr.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
+				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_fini:
+	drm_exec_fini(exec);
+out_up_write:
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
+ * @vm: The VM.
+ * @q: The exec_queue
+ *
+ * Note that this function might be called multiple times on the same queue.
+ */
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	if (!xe_vm_in_preempt_fence_mode(vm))
+		return;
+
+	down_write(&vm->lock);
+	if (!list_empty(&q->lr.link)) {
+		list_del_init(&q->lr.link);
+		--vm->preempt.num_exec_queues;
+	}
+	if (q->lr.pfence) {
+		dma_fence_enable_sw_signaling(q->lr.pfence);
+		dma_fence_put(q->lr.pfence);
+		q->lr.pfence = NULL;
+	}
+	up_write(&vm->lock);
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
+
+/**
+ * xe_vm_kill() - VM Kill
+ * @vm: The VM.
+ * @unlocked: Flag indicates the VM's dma-resv is not held
+ *
+ * Kill the VM by setting banned flag indicated VM is no longer available for
+ * use. If in preempt fence mode, also kill all exec queue attached to the VM.
+ */
+void xe_vm_kill(struct xe_vm *vm, bool unlocked)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (unlocked)
+		xe_vm_lock(vm, false);
+
+	vm->flags |= XE_VM_FLAG_BANNED;
+	trace_xe_vm_kill(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
+		q->ops->kill(q);
+
+	if (unlocked)
+		xe_vm_unlock(vm);
+
+	/* TODO: Inform user the VM is banned */
+}
+
+/**
+ * xe_vm_validate_should_retry() - Whether to retry after a validate error.
+ * @exec: The drm_exec object used for locking before validation.
+ * @err: The error returned from ttm_bo_validate().
+ * @end: A ktime_t cookie that should be set to 0 before first use and
+ * that should be reused on subsequent calls.
+ *
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout. Check if that is necessary, and
+ * if so unlock the drm_exec's objects while keeping the ticket to prepare
+ * for a rerun.
+ *
+ * Return: true if a retry after drm_exec_init() is recommended;
+ * false otherwise.
+ */
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
+{
+	ktime_t cur;
+
+	if (err != -ENOMEM)
+		return false;
+
+	cur = ktime_get();
+	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+	if (!ktime_before(cur, *end))
+		return false;
+
+	msleep(20);
+	return true;
+}
+
+static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+	struct drm_gpuva *gpuva;
+	int ret;
+
+	lockdep_assert_held(&vm->lock);
+	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
+		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
+			       &vm->rebind_list);
+
+	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+	if (ret)
+		return ret;
+
+	vm_bo->evicted = false;
+	return 0;
+}
+
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+			  unsigned int num_fences)
+{
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	do {
+		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+		if (ret)
+			return ret;
+
+		ret = xe_vm_rebind(vm, false);
+		if (ret)
+			return ret;
+	} while (!list_empty(&vm->gpuvm.evict.list));
+
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		ret = dma_resv_reserve_fences(obj->resv, num_fences);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
+				 bool *done)
+{
+	int err;
+
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
+	if (err)
+		return err;
+
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		*done = true;
+		return 0;
+	}
+
+	if (!preempt_fences_waiting(vm)) {
+		*done = true;
+		return 0;
+	}
+
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
+	if (err)
+		return err;
+
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		return err;
+
+	/*
+	 * Add validation and rebinding to the locking loop since both can
+	 * cause evictions which may require blocing dma_resv locks.
+	 * The fence reservation here is intended for the new preempt fences
+	 * we attach at the end of the rebind work.
+	 */
+	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct drm_exec exec;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	ktime_t end = 0;
+	int err = 0;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm)) {
+		up_write(&vm->lock);
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+retry:
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+	drm_exec_until_all_locked(&exec) {
+		bool done = false;
+
+		err = xe_preempt_work_begin(&exec, vm, &done);
+		drm_exec_retry_on_contention(&exec);
+		if (err || done) {
+			drm_exec_fini(&exec);
+			if (err && xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+
+			goto out_unlock_outer;
+		}
+	}
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	err = xe_vm_rebind(vm, true);
+	if (err)
+		goto out_unlock;
+
+	/* Wait on rebinds and munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	spin_lock(&vm->xe->ttm.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&vm->xe->ttm.lru_lock);
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm, &exec);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	drm_exec_fini(&exec);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+
+	if (err) {
+		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+		xe_vm_kill(vm, true);
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
+	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+	struct xe_vma *vma = &uvma->vma;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	xe_assert(vm->xe, xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!userptr->initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) &&
+	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&userptr->invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(xe_vm_resv(vm),
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_userptr_vma *uvma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&uvma->userptr.invalidate_link);
+		list_move_tail(&uvma->userptr.repin_link,
+			       &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+				 userptr.repin_link) {
+		err = xe_vma_userptr_pin_pages(uvma);
+		if (err == -EFAULT) {
+			list_del_init(&uvma->userptr.repin_link);
+
+			/* Wait for pending binds */
+			xe_vm_lock(vm, false);
+			dma_resv_wait_timeout(xe_vm_resv(vm),
+					      DMA_RESV_USAGE_BOOKKEEP,
+					      false, MAX_SCHEDULE_TIMEOUT);
+
+			err = xe_vm_invalidate_vma(&uvma->vma);
+			xe_vm_unlock(vm);
+			if (err)
+				return err;
+		} else {
+			if (err < 0)
+				return err;
+
+			list_del_init(&uvma->userptr.repin_link);
+			list_move_tail(&uvma->vma.combined_links.rebind,
+				       &vm->rebind_list);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
+{
+	int i;
+
+	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
+		if (!vops->pt_update_ops[i].num_ops)
+			continue;
+
+		vops->pt_update_ops[i].ops =
+			kmalloc_array(vops->pt_update_ops[i].num_ops,
+				      sizeof(*vops->pt_update_ops[i].ops),
+				      GFP_KERNEL);
+		if (!vops->pt_update_ops[i].ops)
+			return array_of_binds ? -ENOBUFS : -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void xe_vma_ops_fini(struct xe_vma_ops *vops)
+{
+	int i;
+
+	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+		kfree(vops->pt_update_ops[i].ops);
+}
+
+static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
+{
+	int i;
+
+	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+		if (BIT(i) & tile_mask)
+			++vops->pt_update_ops[i].num_ops;
+}
+
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+				  u8 tile_mask)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->tile_mask = tile_mask;
+	op->base.op = DRM_GPUVA_OP_MAP;
+	op->base.map.va.addr = vma->gpuva.va.addr;
+	op->base.map.va.range = vma->gpuva.va.range;
+	op->base.map.gem.obj = vma->gpuva.gem.obj;
+	op->base.map.gem.offset = vma->gpuva.gem.offset;
+	op->map.vma = vma;
+	op->map.immediate = true;
+	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+	op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+				u8 tile_mask)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	xe_vm_populate_rebind(op, vma, tile_mask);
+	list_add_tail(&op->link, &vops->list);
+	xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
+
+	return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs);
+
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence;
+	struct xe_vma *vma, *next;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err, i;
+
+	lockdep_assert_held(&vm->lock);
+	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+	    list_empty(&vm->rebind_list))
+		return 0;
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+		vops.pt_update_ops[i].wait_vm_bookkeep = true;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
+		xe_assert(vm->xe, vma->tile_present);
+
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+
+		err = xe_vm_ops_add_rebind(&vops, vma,
+					   vma->tile_present);
+		if (err)
+			goto free_ops;
+	}
+
+	err = xe_vma_ops_alloc(&vops, false);
+	if (err)
+		goto free_ops;
+
+	fence = ops_execute(vm, &vops);
+	if (IS_ERR(fence)) {
+		err = PTR_ERR(fence);
+	} else {
+		dma_fence_put(fence);
+		list_for_each_entry_safe(vma, next, &vm->rebind_list,
+					 combined_links.rebind)
+			list_del_init(&vma->combined_links.rebind);
+	}
+free_ops:
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+	xe_vma_ops_fini(&vops);
+
+	return err;
+}
+
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	struct xe_tile *tile;
+	u8 id;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+	for_each_tile(tile, vm->xe, id) {
+		vops.pt_update_ops[id].wait_vm_bookkeep = true;
+		vops.pt_update_ops[tile->id].q =
+			xe_tile_migrate_exec_queue(tile);
+	}
+
+	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+	if (err)
+		return ERR_PTR(err);
+
+	err = xe_vma_ops_alloc(&vops, false);
+	if (err) {
+		fence = ERR_PTR(err);
+		goto free_ops;
+	}
+
+	fence = ops_execute(vm, &vops);
+
+free_ops:
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+	xe_vma_ops_fini(&vops);
+
+	return fence;
+}
+
+static void xe_vma_free(struct xe_vma *vma)
+{
+	if (xe_vma_is_userptr(vma))
+		kfree(to_userptr_vma(vma));
+	else
+		kfree(vma);
+}
+
+#define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
+#define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+#define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    u16 pat_index, unsigned int flags)
+{
+	struct xe_vma *vma;
+	struct xe_tile *tile;
+	u8 id;
+	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
+	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
+
+	xe_assert(vm->xe, start < end);
+	xe_assert(vm->xe, end < vm->size);
+
+	/*
+	 * Allocate and ensure that the xe_vma_is_userptr() return
+	 * matches what was allocated.
+	 */
+	if (!bo && !is_null) {
+		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
+
+		if (!uvma)
+			return ERR_PTR(-ENOMEM);
+
+		vma = &uvma->vma;
+	} else {
+		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		if (is_null)
+			vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+		if (bo)
+			vma->gpuva.gem.obj = &bo->ttm.base;
+	}
+
+	INIT_LIST_HEAD(&vma->combined_links.rebind);
+
+	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
+	vma->gpuva.vm = &vm->gpuvm;
+	vma->gpuva.va.addr = start;
+	vma->gpuva.va.range = end - start + 1;
+	if (read_only)
+		vma->gpuva.flags |= XE_VMA_READ_ONLY;
+	if (dumpable)
+		vma->gpuva.flags |= XE_VMA_DUMPABLE;
+
+	for_each_tile(tile, vm->xe, id)
+		vma->tile_mask |= 0x1 << id;
+
+	if (vm->xe->info.has_atomic_enable_pte_bit)
+		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+
+	vma->pat_index = pat_index;
+
+	if (bo) {
+		struct drm_gpuvm_bo *vm_bo;
+
+		xe_bo_assert_held(bo);
+
+		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
+		if (IS_ERR(vm_bo)) {
+			xe_vma_free(vma);
+			return ERR_CAST(vm_bo);
+		}
+
+		drm_gpuvm_bo_extobj_add(vm_bo);
+		drm_gem_object_get(&bo->ttm.base);
+		vma->gpuva.gem.offset = bo_offset_or_userptr;
+		drm_gpuva_link(&vma->gpuva, vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+	} else /* userptr or null */ {
+		if (!is_null) {
+			struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+			u64 size = end - start + 1;
+			int err;
+
+			INIT_LIST_HEAD(&userptr->invalidate_link);
+			INIT_LIST_HEAD(&userptr->repin_link);
+			vma->gpuva.gem.offset = bo_offset_or_userptr;
+
+			err = mmu_interval_notifier_insert(&userptr->notifier,
+							   current->mm,
+							   xe_vma_userptr(vma), size,
+							   &vma_userptr_notifier_ops);
+			if (err) {
+				xe_vma_free(vma);
+				return ERR_PTR(err);
+			}
+
+			userptr->notifier_seq = LONG_MAX;
+		}
+
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	if (vma->ufence) {
+		xe_sync_ufence_put(vma->ufence);
+		vma->ufence = NULL;
+	}
+
+	if (xe_vma_is_userptr(vma)) {
+		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+		struct xe_userptr *userptr = &uvma->userptr;
+
+		if (userptr->sg)
+			xe_hmm_userptr_free_sg(uvma);
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&userptr->notifier);
+		xe_vm_put(vm);
+	} else if (xe_vma_is_null(vma)) {
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(xe_vma_bo(vma));
+	}
+
+	xe_vma_free(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held_write(&vm->lock);
+	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
+
+	if (xe_vma_is_userptr(vma)) {
+		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
+
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	} else if (!xe_vma_is_null(vma)) {
+		xe_bo_assert_held(xe_vma_bo(vma));
+
+		drm_gpuva_unlink(&vma->gpuva);
+	}
+
+	xe_vm_assert_held(vm);
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+/**
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
+ * @exec: The drm_exec object we're currently locking for.
+ * @vma: The vma for witch we want to lock the vm resv and any attached
+ * object's resv.
+ *
+ * Return: 0 on success, negative error code on error. In particular
+ * may return -EDEADLK on WW transaction contention and -EINTR if
+ * an interruptible wait is terminated by a signal.
+ */
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err;
+
+	XE_WARN_ON(!vm);
+
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (!err && bo && !bo->vm)
+		err = drm_exec_lock_obj(exec, &bo->ttm.base);
+
+	return err;
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct drm_exec exec;
+	int err;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_lock_vma(&exec, vma);
+		drm_exec_retry_on_contention(&exec);
+		if (XE_WARN_ON(err))
+			break;
+	}
+
+	xe_vma_destroy(vma, NULL);
+
+	drm_exec_fini(&exec);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
+{
+	struct drm_gpuva *gpuva;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
+		return NULL;
+
+	xe_assert(vm->xe, start + range <= vm->size);
+
+	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
+
+	return gpuva ? gpuva_to_vma(gpuva) : NULL;
+}
+
+static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	mutex_lock(&vm->snap_mutex);
+	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
+	XE_WARN_ON(err);	/* Shouldn't be possible */
+
+	return err;
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	mutex_lock(&vm->snap_mutex);
+	drm_gpuva_remove(&vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static struct drm_gpuva_op *xe_vm_op_alloc(void)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+
+	if (unlikely(!op))
+		return NULL;
+
+	return &op->base;
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm);
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+	.op_alloc = xe_vm_op_alloc,
+	.vm_bo_validate = xe_gpuvm_validate,
+	.vm_free = xe_vm_free,
+};
+
+static u64 pde_encode_pat_index(u16 pat_index)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	if (pat_index & BIT(2)) {
+		if (pt_level)
+			pte |= XE_PPGTT_PDE_PDPE_PAT2;
+		else
+			pte |= XE_PPGTT_PTE_PAT2;
+	}
+
+	if (pat_index & BIT(3))
+		pte |= XELPG_PPGTT_PTE_PAT3;
+
+	if (pat_index & (BIT(4)))
+		pte |= XE2_PPGTT_PTE_PAT4;
+
+	return pte;
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
+
+	if (pt_level == 1)
+		return XE_PDE_PS_2M;
+	else if (pt_level == 2)
+		return XE_PDPE_PS_1G;
+
+	return 0;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      const u16 pat_index)
+{
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_pat_index(pat_index);
+
+	return pde;
+}
+
+static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      u16 pat_index, u32 pt_level)
+{
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
+			       u16 pat_index, u32 pt_level)
+{
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
+	pte |= pte_encode_pat_index(pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
+				u16 pat_index,
+				u32 pt_level, bool devmem, u64 flags)
+{
+	u64 pte;
+
+	/* Avoid passing random bits directly as flags */
+	xe_assert(xe, !(flags & ~XE_PTE_PS64));
+
+	pte = addr;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (devmem)
+		pte |= XE_PPGTT_PTE_DM;
+
+	pte |= flags;
+
+	return pte;
+}
+
+static const struct xe_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_pte_encode_bo,
+	.pte_encode_vma = xelp_pte_encode_vma,
+	.pte_encode_addr = xelp_pte_encode_addr,
+	.pde_encode_bo = xelp_pde_encode_bo,
+};
+
+static void vm_destroy_work_func(struct work_struct *w);
+
+/**
+ * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given tile and vm.
+ * @xe: xe device.
+ * @tile: tile to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf PTE. All pagetable entries point to the single page-table or,
+ * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
+ * writes become NOPs.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm)
+{
+	u8 id = tile->id;
+	int i;
+
+	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+static void xe_vm_free_scratch(struct xe_vm *vm)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	if (!xe_vm_has_scratch(vm))
+		return;
+
+	for_each_tile(tile, vm->xe, id) {
+		u32 i;
+
+		if (!vm->pt_root[id])
+			continue;
+
+		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
+			if (vm->scratch_pt[id][i])
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
+	}
+}
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct drm_gem_object *vm_resv_obj;
+	struct xe_vm *vm;
+	int err, number_tiles = 0;
+	struct xe_tile *tile;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+
+	vm->size = 1ull << xe->info.va_bits;
+
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+	mutex_init(&vm->snap_mutex);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.exec_queues);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_init(&vm->rftree[id]);
+
+	vm->pt_ops = &xelp_pt_ops;
+
+	/*
+	 * Long-running workloads are not protected by the scheduler references.
+	 * By design, run_job for long-running workloads returns NULL and the
+	 * scheduler drops all the references of it, hence protecting the VM
+	 * for this case is necessary.
+	 */
+	if (flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_get_noresume(xe);
+
+	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+	if (!vm_resv_obj) {
+		err = -ENOMEM;
+		goto err_no_resv;
+	}
+
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
+		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
+
+	drm_gem_object_put(vm_resv_obj);
+
+	err = xe_vm_lock(vm, true);
+	if (err)
+		goto err_close;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAG_64K;
+
+	for_each_tile(tile, xe, id) {
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    tile->id != XE_VM_FLAG_TILE_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_unlock_close;
+		}
+	}
+
+	if (xe_vm_has_scratch(vm)) {
+		for_each_tile(tile, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_vm_create_scratch(xe, tile, vm);
+			if (err)
+				goto err_unlock_close;
+		}
+		vm->batch_invalidate_tlb = true;
+	}
+
+	if (vm->flags & XE_VM_FLAG_LR_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->batch_invalidate_tlb = false;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_tile(tile, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+	}
+	xe_vm_unlock(vm);
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_tile(tile, xe, id) {
+			struct xe_exec_queue *q;
+			u32 create_flags = EXEC_QUEUE_FLAG_VM;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
+			if (IS_ERR(q)) {
+				err = PTR_ERR(q);
+				goto err_close;
+			}
+			vm->q[id] = q;
+			number_tiles++;
+		}
+	}
+
+	if (number_tiles > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_unlock_close:
+	xe_vm_unlock(vm);
+err_close:
+	xe_vm_close_and_put(vm);
+	return ERR_PTR(err);
+
+err_no_resv:
+	mutex_destroy(&vm->snap_mutex);
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
+	kfree(vm);
+	if (flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_put(xe);
+	return ERR_PTR(err);
+}
+
+static void xe_vm_close(struct xe_vm *vm)
+{
+	down_write(&vm->lock);
+	vm->size = 0;
+	up_write(&vm->lock);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	LIST_HEAD(contested);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	struct xe_vma *vma, *next_vma;
+	struct drm_gpuva *gpuva, *next;
+	u8 id;
+
+	xe_assert(xe, !vm->preempt.num_exec_queues);
+
+	xe_vm_close(vm);
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	down_write(&vm->lock);
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id])
+			xe_exec_queue_last_fence_put(vm->q[id], vm);
+	}
+	up_write(&vm->lock);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id]) {
+			xe_exec_queue_kill(vm->q[id]);
+			xe_exec_queue_put(vm->q[id]);
+			vm->q[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, false);
+	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+		vma = gpuva_to_vma(gpuva);
+
+		if (xe_vma_has_no_bo(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags |= XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		xe_vm_remove_vma(vm, vma);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
+			list_del_init(&vma->combined_links.rebind);
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		list_move_tail(&vma->combined_links.destroy, &contested);
+		vma->gpuva.flags |= XE_VMA_DESTROYED;
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	xe_vm_free_scratch(vm);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm);
+
+	/*
+	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+	 * Since we hold a refcount to the bo, we can remove and free
+	 * the members safely without locking.
+	 */
+	list_for_each_entry_safe(vma, next_vma, &contested,
+				 combined_links.destroy) {
+		list_del_init(&vma->combined_links.destroy);
+		xe_vma_destroy_unlocked(vma);
+	}
+
+	up_write(&vm->lock);
+
+	down_write(&xe->usm.lock);
+	if (vm->usm.asid) {
+		void *lookup;
+
+		xe_assert(xe, xe->info.has_asid);
+		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		xe_assert(xe, lookup == vm);
+	}
+	up_write(&xe->usm.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	u8 id;
+
+	/* xe_vm_close_and_put was not called? */
+	xe_assert(xe, !vm->size);
+
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	mutex_destroy(&vm->snap_mutex);
+
+	if (vm->flags & XE_VM_FLAG_LR_MODE)
+		xe_pm_runtime_put(xe);
+
+	for_each_tile(tile, xe, id)
+		XE_WARN_ON(vm->pt_root[id]);
+
+	trace_xe_vm_free(vm);
+
+	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
+
+	if (vm->xef)
+		xe_file_put(vm->xef);
+
+	kfree(vm);
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xef->vm.lock);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
+{
+	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
+					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+}
+
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	return q ? q : vm->q[0];
+}
+
+static struct xe_user_fence *
+find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct xe_sync_entry *e = &syncs[i];
+
+		if (xe_sync_is_ufence(e))
+			return xe_sync_ufence_get(e);
+	}
+
+	return NULL;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_tile *tile;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 !xe->info.has_usm))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
+		flags |= XE_VM_FLAG_LR_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+<<<<<<<
+=======
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err)
+		goto err_close_and_put;
+
+>>>>>>>
+	if (xe->info.has_asid) {
+		down_write(&xe->usm.lock);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+				      XA_LIMIT(1, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		up_write(&xe->usm.lock);
+		if (err < 0)
+			goto err_close_and_put;
+
+		vm->usm.asid = asid;
+	}
+
+	vm->xef = xe_file_get(xef);
+
+	/* Record BO memory for VM pagetable created against client */
+	for_each_tile(tile, xe, id)
+		if (vm->pt_root[id])
+			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
+#endif
+
+	/* user id alloc must always be last in ioctl to prevent UAF */
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	if (err)
+		goto err_close_and_put;
+
+	args->vm_id = id;
+
+	return 0;
+
+<<<<<<<
+=======
+err_free_id:
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+>>>>>>>
+err_close_and_put:
+	xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm))
+		err = -ENOENT;
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
+		err = -EBUSY;
+	else
+		xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (!err)
+		xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
+			     bool post_commit)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->gpuva.flags |= XE_VMA_DESTROYED;
+	up_read(&vm->userptr.notifier_lock);
+	if (post_commit)
+		xe_vm_remove_vma(vm, vma);
+}
+
+#undef ULL
+#define ULL	unsigned long long
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+	struct xe_vma *vma;
+
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
+		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		vma = gpuva_to_vma(op->remap.unmap->va);
+		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->remap.unmap->keep ? 1 : 0);
+		if (op->remap.prev)
+			vm_dbg(&xe->drm,
+			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.prev->va.addr,
+			       (ULL)op->remap.prev->va.range);
+		if (op->remap.next)
+			vm_dbg(&xe->drm,
+			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.next->va.addr,
+			       (ULL)op->remap.next->va.range);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		vma = gpuva_to_vma(op->unmap.va);
+		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma = gpuva_to_vma(op->prefetch.va);
+		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT POSSIBLE");
+	}
+}
+#else
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+}
+#endif
+
+/*
+ * Create operations list from IOCTL arguments, setup operations fields so parse
+ * and commit steps are decoupled from IOCTL arguments. This step can fail.
+ */
+static struct drm_gpuva_ops *
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
+			 u64 bo_offset_or_userptr, u64 addr, u64 range,
+			 u32 operation, u32 flags,
+			 u32 prefetch_region, u16 pat_index)
+{
+	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
+	struct drm_gpuva_ops *ops;
+	struct drm_gpuva_op *__op;
+	struct drm_gpuvm_bo *vm_bo;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	vm_dbg(&vm->xe->drm,
+	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
+	       operation, (ULL)addr, (ULL)range,
+	       (ULL)bo_offset_or_userptr);
+
+	switch (operation) {
+	case DRM_XE_VM_BIND_OP_MAP:
+	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
+		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+						  obj, bo_offset_or_userptr);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP:
+		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_PREFETCH:
+		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
+		xe_assert(vm->xe, bo);
+
+		err = xe_bo_lock(bo, true);
+		if (err)
+			return ERR_PTR(err);
+
+		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
+		if (IS_ERR(vm_bo)) {
+			xe_bo_unlock(bo);
+			return ERR_CAST(vm_bo);
+		}
+
+		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+		xe_bo_unlock(bo);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		ops = ERR_PTR(-EINVAL);
+	}
+	if (IS_ERR(ops))
+		return ops;
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
+			op->map.pat_index = pat_index;
+		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
+			op->prefetch.region = prefetch_region;
+		}
+
+		print_op(vm->xe, __op);
+	}
+
+	return ops;
+}
+
+static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
+			      u16 pat_index, unsigned int flags)
+{
+	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct drm_exec exec;
+	struct xe_vma *vma;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo) {
+		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+		drm_exec_until_all_locked(&exec) {
+			err = 0;
+			if (!bo->vm) {
+				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (!err) {
+				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (err) {
+				drm_exec_fini(&exec);
+				return ERR_PTR(err);
+			}
+		}
+	}
+	vma = xe_vma_create(vm, bo, op->gem.offset,
+			    op->va.addr, op->va.addr +
+			    op->va.range - 1, pat_index, flags);
+	if (IS_ERR(vma))
+		goto err_unlock;
+
+	if (xe_vma_is_userptr(vma))
+		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+	else if (!xe_vma_has_no_bo(vma) && !bo->vm)
+		err = add_preempt_fences(vm, bo);
+
+err_unlock:
+	if (bo)
+		drm_exec_fini(&exec);
+
+	if (err) {
+		prep_vma_destroy(vm, vma, false);
+		xe_vma_destroy_unlocked(vma);
+		vma = ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static u64 xe_vma_max_pte_size(struct xe_vma *vma)
+{
+	if (vma->gpuva.flags & XE_VMA_PTE_1G)
+		return SZ_1G;
+	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
+		return SZ_2M;
+	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
+		return SZ_64K;
+	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+		return SZ_4K;
+
+	return SZ_1G;	/* Uninitialized, used max size */
+}
+
+static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+{
+	switch (size) {
+	case SZ_1G:
+		vma->gpuva.flags |= XE_VMA_PTE_1G;
+		break;
+	case SZ_2M:
+		vma->gpuva.flags |= XE_VMA_PTE_2M;
+		break;
+	case SZ_64K:
+		vma->gpuva.flags |= XE_VMA_PTE_64K;
+		break;
+	case SZ_4K:
+		vma->gpuva.flags |= XE_VMA_PTE_4K;
+		break;
+	}
+}
+
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		u8 tile_present =
+			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
+
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+
+		if (op->remap.prev) {
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
+			if (!err && op->remap.skip_prev) {
+				op->remap.prev->tile_present =
+					tile_present;
+				op->remap.prev = NULL;
+			}
+		}
+		if (op->remap.next) {
+			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
+			if (!err && op->remap.skip_next) {
+				op->remap.next->tile_present =
+					tile_present;
+				op->remap.next = NULL;
+			}
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
+				   struct xe_vma_ops *vops)
+{
+	struct xe_device *xe = vm->xe;
+	struct drm_gpuva_op *__op;
+	struct xe_tile *tile;
+	u8 id, tile_mask = 0;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	for_each_tile(tile, vm->xe, id)
+		tile_mask |= 0x1 << id;
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		struct xe_vma *vma;
+		unsigned int flags = 0;
+
+		INIT_LIST_HEAD(&op->link);
+		list_add_tail(&op->link, &vops->list);
+		op->tile_mask = tile_mask;
+
+		switch (op->base.op) {
+		case DRM_GPUVA_OP_MAP:
+		{
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
+			flags |= op->map.is_null ?
+				VMA_CREATE_FLAG_IS_NULL : 0;
+			flags |= op->map.dumpable ?
+				VMA_CREATE_FLAG_DUMPABLE : 0;
+
+			vma = new_vma(vm, &op->base.map, op->map.pat_index,
+				      flags);
+			if (IS_ERR(vma))
+				return PTR_ERR(vma);
+
+			op->map.vma = vma;
+			if (op->map.immediate || !xe_vm_in_fault_mode(vm))
+				xe_vma_ops_incr_pt_update_ops(vops,
+							      op->tile_mask);
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP:
+		{
+			struct xe_vma *old =
+				gpuva_to_vma(op->base.remap.unmap->va);
+
+			op->remap.start = xe_vma_start(old);
+			op->remap.range = xe_vma_size(old);
+
+			if (op->base.remap.prev) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.prev = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_end(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_prev) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(vma) -
+						xe_vma_start(old);
+					op->remap.start = xe_vma_end(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
+				} else {
+					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+				}
+			}
+
+			if (op->base.remap.next) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.next = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_next = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_start(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_next) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(old) -
+						xe_vma_start(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
+				} else {
+					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+				}
+			}
+			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+		case DRM_GPUVA_OP_PREFETCH:
+			/* FIXME: Need to skip some prefetch ops */
+			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+			break;
+		default:
+			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		}
+
+		err = xe_vma_op_commit(vm, op);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
+			     bool post_commit, bool prev_post_commit,
+			     bool next_post_commit)
+{
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (op->map.vma) {
+			prep_vma_destroy(vm, op->map.vma, post_commit);
+			xe_vma_destroy_unlocked(op->map.vma);
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
+
+		if (op->remap.prev) {
+			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
+			xe_vma_destroy_unlocked(op->remap.prev);
+		}
+		if (op->remap.next) {
+			prep_vma_destroy(vm, op->remap.next, next_post_commit);
+			xe_vma_destroy_unlocked(op->remap.next);
+		}
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
+				     struct drm_gpuva_ops **ops,
+				     int num_ops_list)
+{
+	int i;
+
+	for (i = num_ops_list - 1; i >= 0; --i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
+
+		if (!__ops)
+			continue;
+
+		drm_gpuva_for_each_op_reverse(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+	}
+}
+
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+				 bool validate)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err = 0;
+
+	if (bo) {
+		if (!bo->vm)
+			err = drm_exec_lock_obj(exec, &bo->ttm.base);
+		if (!err && validate)
+			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+	}
+
+	return err;
+}
+
+static int check_ufence(struct xe_vma *vma)
+{
+	if (vma->ufence) {
+		struct xe_user_fence * const f = vma->ufence;
+
+		if (!xe_sync_ufence_get_status(f))
+			return -EBUSY;
+
+		vma->ufence = NULL;
+		xe_sync_ufence_put(f);
+	}
+
+	return 0;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+			    struct xe_vma_op *op)
+{
+	int err = 0;
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = vma_lock_and_validate(exec, op->map.vma,
+					    !xe_vm_in_fault_mode(vm) ||
+					    op->map.immediate);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+		if (err)
+			break;
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.remap.unmap->va),
+					    false);
+		if (!err && op->remap.prev)
+			err = vma_lock_and_validate(exec, op->remap.prev, true);
+		if (!err && op->remap.next)
+			err = vma_lock_and_validate(exec, op->remap.next, true);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+		if (err)
+			break;
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.unmap.va),
+					    false);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+		u32 region = op->prefetch.region;
+
+		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.prefetch.va),
+					    false);
+		if (!err && !xe_vma_has_no_bo(vma))
+			err = xe_bo_migrate(xe_vma_bo(vma),
+					    region_to_mem_type[region]);
+		break;
+	}
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+					   struct xe_vm *vm,
+					   struct xe_vma_ops *vops)
+{
+	struct xe_vma_op *op;
+	int err;
+
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (err)
+		return err;
+
+	list_for_each_entry(op, &vops->list, link) {
+		err = op_lock_and_prep(exec, vm, op);
+		if (err)
+			return err;
+	}
+
+#ifdef TEST_VM_OPS_ERROR
+	if (vops->inject_error &&
+	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
+		return -ENOSPC;
+#endif
+
+	return 0;
+}
+
+static void op_trace(struct xe_vma_op *op)
+{
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		trace_xe_vma_bind(op->map.vma);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
+		if (op->remap.prev)
+			trace_xe_vma_bind(op->remap.prev);
+		if (op->remap.next)
+			trace_xe_vma_bind(op->remap.next);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
+		break;
+	default:
+		XE_WARN_ON("NOT POSSIBLE");
+	}
+}
+
+static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
+{
+	struct xe_vma_op *op;
+
+	list_for_each_entry(op, &vops->list, link)
+		op_trace(op);
+}
+
+static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
+{
+	struct xe_exec_queue *q = vops->q;
+	struct xe_tile *tile;
+	int number_tiles = 0;
+	u8 id;
+
+	for_each_tile(tile, vm->xe, id) {
+		if (vops->pt_update_ops[id].num_ops)
+			++number_tiles;
+
+		if (vops->pt_update_ops[id].q)
+			continue;
+
+		if (q) {
+			vops->pt_update_ops[id].q = q;
+			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+				q = list_next_entry(q, multi_gt_list);
+		} else {
+			vops->pt_update_ops[id].q = vm->q[id];
+		}
+	}
+
+	return number_tiles;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops)
+{
+	struct xe_tile *tile;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	int number_tiles = 0, current_fence = 0, err;
+	u8 id;
+
+	number_tiles = vm_ops_setup_tile_args(vm, vops);
+	if (number_tiles == 0)
+		return ERR_PTR(-ENODATA);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences) {
+			fence = ERR_PTR(-ENOMEM);
+			goto err_trace;
+		}
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!vops->pt_update_ops[id].num_ops)
+			continue;
+
+		err = xe_pt_update_ops_prepare(tile, vops);
+		if (err) {
+			fence = ERR_PTR(err);
+			goto err_out;
+		}
+	}
+
+	trace_xe_vm_ops_execute(vops);
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!vops->pt_update_ops[id].num_ops)
+			continue;
+
+		fence = xe_pt_update_ops_run(tile, vops);
+		if (IS_ERR(fence))
+			goto err_out;
+
+		if (fences)
+			fences[current_fence++] = fence;
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			fence = ERR_PTR(-ENOMEM);
+			goto err_out;
+		}
+		fence = &cf->base;
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!vops->pt_update_ops[id].num_ops)
+			continue;
+
+		xe_pt_update_ops_fini(tile, vops);
+	}
+
+	return fence;
+
+err_out:
+	for_each_tile(tile, vm->xe, id) {
+		if (!vops->pt_update_ops[id].num_ops)
+			continue;
+
+		xe_pt_update_ops_abort(tile, vops);
+	}
+	while (current_fence)
+		dma_fence_put(fences[--current_fence]);
+	kfree(fences);
+	kfree(cf);
+
+err_trace:
+	trace_xe_vm_ops_fail(vm);
+	return fence;
+}
+
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+	if (vma->ufence)
+		xe_sync_ufence_put(vma->ufence);
+	vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+			  struct xe_user_fence *ufence)
+{
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		vma_add_ufence(op->map.vma, ufence);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			vma_add_ufence(op->remap.prev, ufence);
+		if (op->remap.next)
+			vma_add_ufence(op->remap.next, ufence);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+				   struct dma_fence *fence)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_user_fence *ufence;
+	struct xe_vma_op *op;
+	int i;
+
+	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
+	list_for_each_entry(op, &vops->list, link) {
+		if (ufence)
+			op_add_ufence(vm, op, ufence);
+
+		if (op->base.op == DRM_GPUVA_OP_UNMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+		else if (op->base.op == DRM_GPUVA_OP_REMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       fence);
+	}
+	if (ufence)
+		xe_sync_ufence_put(ufence);
+	for (i = 0; i < vops->num_syncs; i++)
+		xe_sync_entry_signal(vops->syncs + i, fence);
+	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops)
+{
+	struct drm_exec exec;
+	struct dma_fence *fence;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			goto unlock;
+
+		fence = ops_execute(vm, vops);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto unlock;
+		}
+
+		vm_bind_ioctl_ops_fini(vm, vops, fence);
+	}
+
+unlock:
+	drm_exec_fini(&exec);
+	return err;
+}
+
+#define SUPPORTED_FLAGS_STUB  \
+	(DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
+	 DRM_XE_VM_BIND_FLAG_NULL | \
+	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
+
+#ifdef TEST_VM_OPS_ERROR
+#define SUPPORTED_FLAGS	(SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
+#else
+#define SUPPORTED_FLAGS	SUPPORTED_FLAGS_STUB
+#endif
+
+#define XE_64K_PAGE_MASK 0xffffull
+#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kvmalloc_array(args->num_binds,
+					   sizeof(struct drm_xe_vm_bind_op),
+					   GFP_KERNEL | __GFP_ACCOUNT);
+		if (!*bind_ops)
+			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_DBG(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 flags = (*bind_ops)[i].flags;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
+		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		u16 pat_index = (*bind_ops)[i].pat_index;
+		u16 coh_mode;
+
+		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+		(*bind_ops)[i].pat_index = pat_index;
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, obj && is_null) ||
+		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
+		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
+				 is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP &&
+				 !is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, addr &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, range &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, prefetch_region &&
+				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, !range &&
+				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kvfree(*bind_ops);
+	return err;
+}
+
+static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
+				       struct xe_exec_queue *q,
+				       struct xe_sync_entry *syncs,
+				       int num_syncs)
+{
+	struct dma_fence *fence;
+	int i, err = 0;
+
+	fence = xe_sync_in_fence_get(syncs, num_syncs,
+				     to_wait_exec_queue(vm, q), vm);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], fence);
+
+	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
+				     fence);
+	dma_fence_put(fence);
+
+	return err;
+}
+
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	memset(vops, 0, sizeof(*vops));
+	INIT_LIST_HEAD(&vops->list);
+	vops->vm = vm;
+	vops->q = q;
+	vops->syncs = syncs;
+	vops->num_syncs = num_syncs;
+}
+
+static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
+					u64 addr, u64 range, u64 obj_offset,
+					u16 pat_index)
+{
+	u16 coh_mode;
+
+	if (XE_IOCTL_DBG(xe, range > bo->size) ||
+	    XE_IOCTL_DBG(xe, obj_offset >
+			 bo->size - range)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Some platforms require 64k VM_BIND alignment,
+	 * specifically those with XE_VRAM_FLAGS_NEED64K.
+	 *
+	 * Other platforms may have BO's set to 64k physical placement,
+	 * but can be mapped at 4k offsets anyway. This check is only
+	 * there for the former case.
+	 */
+	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
+	    (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
+		if (XE_IOCTL_DBG(xe, obj_offset &
+				 XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+			return  -EINVAL;
+		}
+	}
+
+	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+	if (bo->cpu_caching) {
+		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+			return  -EINVAL;
+		}
+	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+		/*
+		 * Imported dma-buf from a different device should
+		 * require 1way or 2way coherency since we don't know
+		 * how it was mapped on the CPU. Just assume is it
+		 * potentially cached on CPU side.
+		 */
+		return  -EINVAL;
+	}
+
+	return 0;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct drm_gpuva_ops **ops = NULL;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q = NULL;
+	u32 num_syncs, num_ufence = 0;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	struct xe_vma_ops vops;
+	int err;
+	int i;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
+	if (err)
+		return err;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q)) {
+			err = -ENOENT;
+			goto free_objs;
+		}
+
+		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_exec_queue;
+		}
+	}
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm)) {
+		err = -EINVAL;
+		goto put_exec_queue;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto put_vm;
+
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_DBG(xe, range > vm->size) ||
+		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto release_vm_lock;
+		}
+	}
+
+	if (args->num_binds) {
+		bos = kvcalloc(args->num_binds, sizeof(*bos),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!bos) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+
+		ops = kvcalloc(args->num_binds, sizeof(*ops),
+			       GFP_KERNEL | __GFP_ACCOUNT);
+		if (!ops) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_DBG(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
+						   obj_offset, pat_index);
+		if (err)
+			goto put_obj;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs],
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0) |
+					  (!args->num_binds ?
+					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
+		if (err)
+			goto free_syncs;
+
+		if (xe_sync_is_ufence(&syncs[num_syncs]))
+			num_ufence++;
+	}
+
+	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+		err = -EINVAL;
+		goto free_syncs;
+	}
+
+	if (!args->num_binds) {
+		err = -ENODATA;
+		goto free_syncs;
+	}
+
+	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+						  addr, range, op, flags,
+						  prefetch_region, pat_index);
+		if (IS_ERR(ops[i])) {
+			err = PTR_ERR(ops[i]);
+			ops[i] = NULL;
+			goto unwind_ops;
+		}
+
+		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
+		if (err)
+			goto unwind_ops;
+
+#ifdef TEST_VM_OPS_ERROR
+		if (flags & FORCE_OP_ERROR) {
+			vops.inject_error = true;
+			vm->xe->vm_inject_error_position =
+				(vm->xe->vm_inject_error_position + 1) %
+				FORCE_OP_ERROR_COUNT;
+		}
+#endif
+	}
+
+	/* Nothing to do */
+	if (list_empty(&vops.list)) {
+		err = -ENODATA;
+		goto unwind_ops;
+	}
+
+	err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
+	if (err)
+		goto unwind_ops;
+
+	err = vm_bind_ioctl_ops_execute(vm, &vops);
+
+unwind_ops:
+	if (err && err != -ENODATA)
+		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	xe_vma_ops_fini(&vops);
+	for (i = args->num_binds - 1; i >= 0; --i)
+		if (ops[i])
+			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
+free_syncs:
+	if (err == -ENODATA)
+		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
+	while (num_syncs--)
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+
+	kfree(syncs);
+put_obj:
+	for (i = 0; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+release_vm_lock:
+	up_write(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+put_exec_queue:
+	if (q)
+		xe_exec_queue_put(q);
+free_objs:
+	kvfree(bos);
+	kvfree(ops);
+	if (args->num_binds > 1)
+		kvfree(bind_ops);
+	return err;
+}
+
+/**
+ * xe_vm_lock() - Lock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be locked
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is false, the function
+ * always returns 0.
+ */
+int xe_vm_lock(struct xe_vm *vm, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+
+	return dma_resv_lock(xe_vm_resv(vm), NULL);
+}
+
+/**
+ * xe_vm_unlock() - Unlock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_vm_lock().
+ */
+void xe_vm_unlock(struct xe_vm *vm)
+{
+	dma_resv_unlock(xe_vm_resv(vm));
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+	struct xe_tile *tile;
+	struct xe_gt_tlb_invalidation_fence
+		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+	u8 id;
+	u32 fence_id = 0;
+	int ret = 0;
+
+	xe_assert(xe, !xe_vma_is_null(vma));
+	trace_xe_vma_invalidate(vma);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&to_userptr_vma(vma)->userptr.notifier,
+				      to_userptr_vma(vma)->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(xe_vma_bo(vma));
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (xe_pt_zap_ptes(tile, vma)) {
+			xe_device_wmb(xe);
+			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+							  &fence[fence_id],
+							  true);
+
+			ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
+							 &fence[fence_id], vma);
+			if (ret < 0) {
+				xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+				goto wait;
+			}
+			++fence_id;
+
+			if (!tile->media_gt)
+				continue;
+
+			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+							  &fence[fence_id],
+							  true);
+
+			ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
+							 &fence[fence_id], vma);
+			if (ret < 0) {
+				xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+				goto wait;
+			}
+			++fence_id;
+		}
+	}
+
+wait:
+	for (id = 0; id < fence_id; ++id)
+		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
+	vma->tile_invalidated = vma->tile_mask;
+
+	return ret;
+}
+
+struct xe_vm_snapshot {
+	unsigned long num_snaps;
+	struct {
+		u64 ofs, bo_ofs;
+		unsigned long len;
+		struct xe_bo *bo;
+		void *data;
+		struct mm_struct *mm;
+	} snap[];
+};
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
+{
+	unsigned long num_snaps = 0, i;
+	struct xe_vm_snapshot *snap = NULL;
+	struct drm_gpuva *gpuva;
+
+	if (!vm)
+		return NULL;
+
+	mutex_lock(&vm->snap_mutex);
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		if (gpuva->flags & XE_VMA_DUMPABLE)
+			num_snaps++;
+	}
+
+	if (num_snaps)
+		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
+	if (!snap) {
+		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
+		goto out_unlock;
+	}
+
+	snap->num_snaps = num_snaps;
+	i = 0;
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
+		struct xe_bo *bo = vma->gpuva.gem.obj ?
+			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
+
+		if (!(gpuva->flags & XE_VMA_DUMPABLE))
+			continue;
+
+		snap->snap[i].ofs = xe_vma_start(vma);
+		snap->snap[i].len = xe_vma_size(vma);
+		if (bo) {
+			snap->snap[i].bo = xe_bo_get(bo);
+			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
+		} else if (xe_vma_is_userptr(vma)) {
+			struct mm_struct *mm =
+				to_userptr_vma(vma)->userptr.notifier.mm;
+
+			if (mmget_not_zero(mm))
+				snap->snap[i].mm = mm;
+			else
+				snap->snap[i].data = ERR_PTR(-EFAULT);
+
+			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
+		} else {
+			snap->snap[i].data = ERR_PTR(-ENOENT);
+		}
+		i++;
+	}
+
+out_unlock:
+	mutex_unlock(&vm->snap_mutex);
+	return snap;
+}
+
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
+{
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (int i = 0; i < snap->num_snaps; i++) {
+		struct xe_bo *bo = snap->snap[i].bo;
+		struct iosys_map src;
+		int err;
+
+		if (IS_ERR(snap->snap[i].data))
+			continue;
+
+		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
+		if (!snap->snap[i].data) {
+			snap->snap[i].data = ERR_PTR(-ENOMEM);
+			goto cleanup_bo;
+		}
+
+		if (bo) {
+			xe_bo_lock(bo, false);
+			err = ttm_bo_vmap(&bo->ttm, &src);
+			if (!err) {
+				xe_map_memcpy_from(xe_bo_device(bo),
+						   snap->snap[i].data,
+						   &src, snap->snap[i].bo_ofs,
+						   snap->snap[i].len);
+				ttm_bo_vunmap(&bo->ttm, &src);
+			}
+			xe_bo_unlock(bo);
+		} else {
+			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
+
+			kthread_use_mm(snap->snap[i].mm);
+			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
+				err = 0;
+			else
+				err = -EFAULT;
+			kthread_unuse_mm(snap->snap[i].mm);
+
+			mmput(snap->snap[i].mm);
+			snap->snap[i].mm = NULL;
+		}
+
+		if (err) {
+			kvfree(snap->snap[i].data);
+			snap->snap[i].data = ERR_PTR(err);
+		}
+
+cleanup_bo:
+		xe_bo_put(bo);
+		snap->snap[i].bo = NULL;
+	}
+}
+
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
+{
+	unsigned long i, j;
+
+	if (IS_ERR_OR_NULL(snap)) {
+		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+		return;
+	}
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
+
+		if (IS_ERR(snap->snap[i].data)) {
+			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
+				   PTR_ERR(snap->snap[i].data));
+			continue;
+		}
+
+		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
+
+		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
+			u32 *val = snap->snap[i].data + j;
+			char dumped[ASCII85_BUFSZ];
+
+			drm_puts(p, ascii85_encode(*val, dumped));
+		}
+
+		drm_puts(p, "\n");
+	}
+}
+
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
+{
+	unsigned long i;
+
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		if (!IS_ERR(snap->snap[i].data))
+			kvfree(snap->snap[i].data);
+		xe_bo_put(snap->snap[i].bo);
+		if (snap->snap[i].mm)
+			mmput(snap->snap[i].mm);
+	}
+	kvfree(snap);
+}
diff --git a/rr-cache/c8f6c6d4bf8c0fcdd8c474baad7e3e678ea6c58d/preimage b/rr-cache/c8f6c6d4bf8c0fcdd8c474baad7e3e678ea6c58d/preimage
new file mode 100644
index 000000000000..40f5a256a79d
--- /dev/null
+++ b/rr-cache/c8f6c6d4bf8c0fcdd8c474baad7e3e678ea6c58d/preimage
@@ -0,0 +1,2215 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+<<<<<<<
+#include <linux/dma-fence-array.h>
+=======
+#include <linux/dma-fence-chain.h>
+>>>>>>>
+
+#include "xe_pt.h"
+
+#include "regs/xe_gtt_defs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
+
+struct xe_pt_dir {
+	struct xe_pt pt;
+	/** @children: Array of page-table child nodes */
+	struct xe_ptw *children[XE_PDES];
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+	return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+	return container_of(pt_dir->children[index], struct xe_pt, base);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
+			     unsigned int level)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+	u8 id = tile->id;
+
+	if (!xe_vm_has_scratch(vm))
+		return 0;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
+						 0, pat_index);
+
+	return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
+		XE_PTE_NULL;
+}
+
+static void xe_pt_free(struct xe_pt *pt)
+{
+	if (pt->level)
+		kfree(as_xe_pt_dir(pt));
+	else
+		kfree(pt);
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @tile: The tile to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
+			   unsigned int level)
+{
+	struct xe_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	if (level) {
+		struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+
+		pt = (dir) ? &dir->pt : NULL;
+	} else {
+		pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+	}
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	pt->level = level;
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
+				  XE_BO_FLAG_PINNED |
+				  XE_BO_FLAG_NO_RESV_EVICT |
+				  XE_BO_FLAG_PAGETABLE);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto err_kfree;
+	}
+	pt->bo = bo;
+	pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
+
+	if (vm->xef)
+		xe_drm_client_add_bo(vm->xef->client, pt->bo);
+	xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
+
+	return pt;
+
+err_kfree:
+	xe_pt_free(pt);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @tile: The tile the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the tile's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
+			  struct xe_pt *pt)
+{
+	struct iosys_map *map = &pt->bo->vmap;
+	u64 empty;
+	int i;
+
+	if (!xe_vm_has_scratch(vm)) {
+		/*
+		 * FIXME: Some memory is allocated already allocated to zero?
+		 * Find out which memory that is and avoid this memset...
+		 */
+		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+	} else {
+		empty = __xe_pt_empty_pte(tile, vm, pt->level);
+		for (i = 0; i < XE_PDES; i++)
+			xe_pt_write(vm->xe, map, i, empty);
+	}
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+	return XE_PTE_SHIFT + XE_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ *            immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+	int i;
+
+	if (!pt)
+		return;
+
+	XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
+	xe_bo_unpin(pt->bo);
+	xe_bo_put_deferred(pt->bo, deferred);
+
+	if (pt->level > 0 && pt->num_live) {
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		for (i = 0; i < XE_PDES; i++) {
+			if (xe_pt_entry(pt_dir, i))
+				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+					      deferred);
+		}
+	}
+	xe_pt_free(pt);
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+	/** @update: The update structure we're building for this parent. */
+	struct xe_vm_pgtable_update *update;
+	/** @parent: The parent. Used to detect a parent change. */
+	struct xe_pt *parent;
+	/** @preexisting: Whether the parent was pre-existing or allocated */
+	bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+	/** base: The base class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @vm: The vm we're building for. */
+	struct xe_vm *vm;
+	/** @tile: The tile we're building for. */
+	struct xe_tile *tile;
+	/** @default_pte: PTE flag only template. No address is associated */
+	u64 default_pte;
+	/** @dma_offset: DMA offset to add to the PTE. */
+	u64 dma_offset;
+	/**
+	 * @needs_64k: This address range enforces 64K alignment and
+	 * granularity.
+	 */
+	bool needs_64K;
+	/**
+	 * @vma: VMA being mapped
+	 */
+	struct xe_vma *vma;
+
+	/* Also input, but is updated during the walk*/
+	/** @curs: The DMA address cursor. */
+	struct xe_res_cursor *curs;
+	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	u64 va_curs_start;
+
+	/* Output */
+	struct xe_walk_update {
+		/** @wupd.entries: Caller provided storage. */
+		struct xe_vm_pgtable_update *entries;
+		/** @wupd.num_used_entries: Number of update @entries used. */
+		unsigned int num_used_entries;
+		/** @wupd.updates: Tracks the update entry at a given level */
+		struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+	} wupd;
+
+	/* Walk state */
+	/**
+	 * @l0_end_addr: The end address of the current l0 leaf. Used for
+	 * 64K granularity detection.
+	 */
+	u64 l0_end_addr;
+	/** @addr_64K: The start address of the current 64K chunk. */
+	u64 addr_64K;
+	/** @found_64: Whether @add_64K actually points to a 64K chunk. */
+	bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+		 pgoff_t offset, bool alloc_entries)
+{
+	struct xe_pt_update *upd = &wupd->updates[parent->level];
+	struct xe_vm_pgtable_update *entry;
+
+	/*
+	 * For *each level*, we could only have one active
+	 * struct xt_pt_update at any one time. Once we move on to a
+	 * new parent and page-directory, the old one is complete, and
+	 * updates are either already stored in the build tree or in
+	 * @wupd->entries
+	 */
+	if (likely(upd->parent == parent))
+		return 0;
+
+	upd->parent = parent;
+	upd->preexisting = true;
+
+	if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+		return -EINVAL;
+
+	entry = wupd->entries + wupd->num_used_entries++;
+	upd->update = entry;
+	entry->ofs = offset;
+	entry->pt_bo = parent->bo;
+	entry->pt = parent;
+	entry->flags = 0;
+	entry->qwords = 0;
+	entry->pt_bo->update_index = -1;
+
+	if (alloc_entries) {
+		entry->pt_entries = kmalloc_array(XE_PDES,
+						  sizeof(*entry->pt_entries),
+						  GFP_KERNEL);
+		if (!entry->pt_entries)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+	struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+	struct xe_pt_update *child_upd = xe_child ?
+		&xe_walk->wupd.updates[xe_child->level] : NULL;
+	int ret;
+
+	ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Register this new pagetable so that it won't be recognized as
+	 * a shared pagetable by a subsequent insertion.
+	 */
+	if (unlikely(child_upd)) {
+		child_upd->update = NULL;
+		child_upd->parent = xe_child;
+		child_upd->preexisting = false;
+	}
+
+	if (likely(!upd->preexisting)) {
+		/* Continue building a non-connected subtree. */
+		struct iosys_map *map = &parent->bo->vmap;
+
+		if (unlikely(xe_child))
+			parent->base.children[offset] = &xe_child->base;
+
+		xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+		parent->num_live++;
+	} else {
+		/* Shared pt. Stage update. */
+		unsigned int idx;
+		struct xe_vm_pgtable_update *entry = upd->update;
+
+		idx = offset - entry->ofs;
+		entry->pt_entries[idx].pt = xe_child;
+		entry->pt_entries[idx].pte = pte;
+		entry->qwords++;
+	}
+
+	return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+				   struct xe_pt_stage_bind_walk *xe_walk)
+{
+	u64 size, dma;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return false;
+
+	/* Does the virtual range requested cover a huge pte? */
+	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+		return false;
+
+	/* Does the DMA segment cover the whole pte? */
+	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	/* Is the DMA address huge PTE size aligned? */
+	size = next - addr;
+	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+	return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	struct xe_res_cursor curs = *xe_walk->curs;
+
+	if (!IS_ALIGNED(addr, SZ_64K))
+		return false;
+
+	if (next > xe_walk->l0_end_addr)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	xe_res_next(&curs, addr - xe_walk->va_curs_start);
+	for (; addr < next; addr += SZ_64K) {
+		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+			return false;
+
+		xe_res_next(&curs, SZ_64K);
+	}
+
+	return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	/* Address is within an already found 64k region */
+	if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+		return true;
+
+	xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+	xe_walk->addr_64K = addr;
+
+	return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+		       unsigned int level, u64 addr, u64 next,
+		       struct xe_ptw **child,
+		       enum page_walk_action *action,
+		       struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_bind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	u16 pat_index = xe_walk->vma->pat_index;
+	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_vm *vm = xe_walk->vm;
+	struct xe_pt *xe_child;
+	bool covers;
+	int ret = 0;
+	u64 pte;
+
+	/* Is this a leaf entry ?*/
+	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+		struct xe_res_cursor *curs = xe_walk->curs;
+		bool is_null = xe_vma_is_null(xe_walk->vma);
+
+		XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+						 xe_res_dma(curs) + xe_walk->dma_offset,
+						 xe_walk->vma, pat_index, level);
+		pte |= xe_walk->default_pte;
+
+		/*
+		 * Set the XE_PTE_PS64 hint if possible, otherwise if
+		 * this device *requires* 64K PTE size for VRAM, fail.
+		 */
+		if (level == 0 && !xe_parent->is_compact) {
+			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
+				pte |= XE_PTE_PS64;
+			} else if (XE_WARN_ON(xe_walk->needs_64K)) {
+				return -EINVAL;
+			}
+		}
+
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+		if (unlikely(ret))
+			return ret;
+
+		if (!is_null)
+			xe_res_next(curs, next - addr);
+		xe_walk->va_curs_start = next;
+		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
+		*action = ACTION_CONTINUE;
+
+		return ret;
+	}
+
+	/*
+	 * Descending to lower level. Determine if we need to allocate a
+	 * new page table or -directory, which we do if there is no
+	 * previous one or there is one we can completely replace.
+	 */
+	if (level == 1) {
+		walk->shifts = xe_normal_pt_shifts;
+		xe_walk->l0_end_addr = next;
+	}
+
+	covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+	if (covers || !*child) {
+		u64 flags = 0;
+
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+		if (IS_ERR(xe_child))
+			return PTR_ERR(xe_child);
+
+		xe_pt_set_addr(xe_child,
+			       round_down(addr, 1ull << walk->shifts[level]));
+
+		if (!covers)
+			xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child);
+
+		*child = &xe_child->base;
+
+		/*
+		 * Prefer the compact pagetable layout for L0 if possible. Only
+		 * possible if VMA covers entire 2MB region as compact 64k and
+		 * 4k pages cannot be mixed within a 2MB region.
+		 * TODO: Suballocate the pt bo to avoid wasting a lot of
+		 * memory.
+		 */
+		if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
+		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+			walk->shifts = xe_compact_pt_shifts;
+			xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT;
+			flags |= XE_PDE_64K;
+			xe_child->is_compact = true;
+		}
+
+		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+					 pte);
+	}
+
+	*action = ACTION_SUBTREE;
+	return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+	.pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @tile: The tile we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
+		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
+		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
+	struct xe_res_cursor curs;
+	struct xe_pt_stage_bind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_bind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.vm = xe_vma_vm(vma),
+		.tile = tile,
+		.curs = &curs,
+		.va_curs_start = xe_vma_start(vma),
+		.vma = vma,
+		.wupd.entries = entries,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	int ret;
+
+	/**
+	 * Default atomic expectations for different allocation scenarios are as follows:
+	 *
+	 * 1. Traditional API: When the VM is not in LR mode:
+	 *    - Device atomics are expected to function with all allocations.
+	 *
+	 * 2. Compute/SVM API: When the VM is in LR mode:
+	 *    - Device atomics are the default behavior when the bo is placed in a single region.
+	 *    - In all other cases device atomics will be disabled with AE=0 until an application
+	 *      request differently using a ioctl like madvise.
+	 */
+	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
+		if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
+			if (bo && xe_bo_has_single_placement(bo))
+				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+			/**
+			 * If a SMEM+LMEM allocation is backed by SMEM, a device
+			 * atomics will cause a gpu page fault and which then
+			 * gets migrated to LMEM, bind such allocations with
+			 * device atomics enabled.
+			 */
+			else if (is_devmem && !xe_bo_has_single_placement(bo))
+				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+		} else {
+			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+		}
+
+		/**
+		 * Unset AE if the platform(PVC) doesn't support it on an
+		 * allocation
+		 */
+		if (!xe->info.has_device_atomics_on_smem && !is_devmem)
+			xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+	}
+
+	if (is_devmem) {
+		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
+		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
+	}
+
+	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
+		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
+
+	xe_bo_assert_held(bo);
+
+	if (!xe_vma_is_null(vma)) {
+		if (xe_vma_is_userptr(vma))
+			xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
+					xe_vma_size(vma), &curs);
+		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
+			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
+				     xe_vma_size(vma), &curs);
+		else
+			xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
+					xe_vma_size(vma), &curs);
+	} else {
+		curs.size = xe_vma_size(vma);
+	}
+
+	ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
+			       xe_vma_end(vma), &xe_walk.base);
+
+	*num_entries = xe_walk.wupd.num_used_entries;
+	return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+				    struct xe_pt_walk *walk,
+				    enum page_walk_action *action,
+				    pgoff_t *offset, pgoff_t *end_offset)
+{
+	u64 size = 1ull << walk->shifts[level];
+
+	*offset = xe_pt_offset(addr, level, walk);
+	*end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+	if (!level)
+		return true;
+
+	/*
+	 * If addr or next are not size aligned, there are shared pts at lower
+	 * level, so in that case traverse down the subtree
+	 */
+	*action = ACTION_CONTINUE;
+	if (!IS_ALIGNED(addr, size)) {
+		*action = ACTION_SUBTREE;
+		(*offset)++;
+	}
+
+	if (!IS_ALIGNED(end, size)) {
+		*action = ACTION_SUBTREE;
+		(*end_offset)--;
+	}
+
+	return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+	/** @base: The walk base-class */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're building for */
+	struct xe_tile *tile;
+
+	/* Output */
+	/** @needs_invalidate: Whether we need to invalidate TLB*/
+	bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_zap_ptes_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level);
+
+	/*
+	 * Note that we're called from an entry callback, and we're dealing
+	 * with the child of that entry rather than the parent, so need to
+	 * adjust level down.
+	 */
+	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+				    &end_offset)) {
+		xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap,
+			      offset * sizeof(u64), 0,
+			      (end_offset - offset) * sizeof(u64));
+		xe_walk->needs_invalidate = true;
+	}
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+	.pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @tile: The tile we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
+{
+	struct xe_pt_zap_ptes_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_zap_ptes_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
+
+	if (!(pt_mask & BIT(tile->id)))
+		return false;
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
+		       struct iosys_map *map, void *data,
+		       u32 qword_ofs, u32 num_qwords,
+		       const struct xe_vm_pgtable_update *update)
+{
+	struct xe_pt_entry *ptes = update->pt_entries;
+	u64 *ptr = data;
+	u32 i;
+
+	for (i = 0; i < num_qwords; i++) {
+		if (map)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, ptes[i].pte);
+		else
+			ptr[i] = ptes[i].pte;
+	}
+}
+
+static void xe_pt_cancel_bind(struct xe_vma *vma,
+			      struct xe_vm_pgtable_update *entries,
+			      u32 num_entries)
+{
+	u32 i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+
+		if (!pt)
+			continue;
+
+		if (pt->level) {
+			for (j = 0; j < entries[i].qwords; j++)
+				xe_pt_destroy(entries[i].pt_entries[j].pt,
+					      xe_vma_vm(vma)->flags, NULL);
+		}
+
+		kfree(entries[i].pt_entries);
+		entries[i].pt_entries = NULL;
+		entries[i].qwords = 0;
+	}
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held(&vm->lock);
+
+	if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma))
+		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
+
+	xe_vm_assert_held(vm);
+}
+
+static void xe_pt_commit(struct xe_vma *vma,
+			 struct xe_vm_pgtable_update *entries,
+			 u32 num_entries, struct llist_head *deferred)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+
+		if (!pt->level)
+			continue;
+
+		for (j = 0; j < entries[i].qwords; j++) {
+			struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+			xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred);
+		}
+	}
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+			     struct xe_vm_pgtable_update *entries,
+			     u32 num_entries, bool rebind)
+{
+	int i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = num_entries - 1; i >= 0; --i) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live -= entries[i].qwords;
+
+		if (!pt->level)
+			continue;
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
+			struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+			pt_dir->children[j_] = oldpte ? &oldpte->base : 0;
+			xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
+		}
+	}
+}
+
+static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
+				      struct xe_vm_pgtable_update *entries,
+				      u32 num_entries, bool rebind)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live += entries[i].qwords;
+
+		if (!pt->level)
+			continue;
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+			struct xe_pt *oldpte = NULL;
+
+			if (xe_pt_entry(pt_dir, j_))
+				oldpte = xe_pt_entry(pt_dir, j_);
+
+			pt_dir->children[j_] = &newpte->base;
+			entries[i].pt_entries[j].pt = oldpte;
+		}
+	}
+}
+
+static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
+			    u32 num_entries)
+{
+	u32 i;
+
+	for (i = 0; i < num_entries; i++)
+		kfree(entries[i].pt_entries);
+}
+
+static int
+xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	int err;
+
+	*num_entries = 0;
+	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
+	if (!err)
+		xe_tile_assert(tile, *num_entries);
+
+	return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+				    const struct xe_vm_pgtable_update *entries,
+				    unsigned int num_entries, bool bind)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+	unsigned int i;
+
+	vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind",
+	       num_entries);
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *xe_pt = entry->pt;
+		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+		u64 end;
+		u64 start;
+
+		xe_assert(xe, !entry->pt->is_compact);
+		start = entry->ofs * page_size;
+		end = start + page_size * entry->qwords;
+		vm_dbg(&xe->drm,
+		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+		       i, xe_pt->level, entry->ofs, entry->qwords,
+		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+	}
+}
+#else
+{}
+#endif
+
+static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct dma_fence *fence = syncs[i].fence;
+
+		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				       &fence->flags))
+			return false;
+	}
+
+	return true;
+}
+
+static int job_test_add_deps(struct xe_sched_job *job,
+			     struct dma_resv *resv,
+			     enum dma_resv_usage usage)
+{
+	if (!job) {
+		if (!dma_resv_test_signaled(resv, usage))
+			return -ETIME;
+
+		return 0;
+	}
+
+	return xe_sched_job_add_deps(job, resv, usage);
+}
+
+static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+
+	xe_bo_assert_held(bo);
+
+	if (bo && !bo->vm)
+		return job_test_add_deps(job, bo->ttm.base.resv,
+					 DMA_RESV_USAGE_KERNEL);
+
+	return 0;
+}
+
+static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
+		       struct xe_sched_job *job)
+{
+	int err = 0;
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+			break;
+
+		err = vma_add_deps(op->map.vma, job);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			err = vma_add_deps(op->remap.prev, job);
+		if (!err && op->remap.next)
+			err = vma_add_deps(op->remap.next, job);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int xe_pt_vm_dependencies(struct xe_sched_job *job,
+				 struct xe_vm *vm,
+				 struct xe_vma_ops *vops,
+				 struct xe_vm_pgtable_update_ops *pt_update_ops,
+				 struct xe_range_fence_tree *rftree)
+{
+	struct xe_range_fence *rtfence;
+	struct dma_fence *fence;
+	struct xe_vma_op *op;
+	int err = 0, i;
+
+	xe_vm_assert_held(vm);
+
+	if (!job && !no_in_syncs(vops->syncs, vops->num_syncs))
+		return -ETIME;
+
+	if (!job && !xe_exec_queue_is_idle(pt_update_ops->q))
+		return -ETIME;
+
+	if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) {
+		err = job_test_add_deps(job, xe_vm_resv(vm),
+					pt_update_ops->wait_vm_bookkeep ?
+					DMA_RESV_USAGE_BOOKKEEP :
+					DMA_RESV_USAGE_KERNEL);
+		if (err)
+			return err;
+	}
+
+	rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start,
+					    pt_update_ops->last);
+	while (rtfence) {
+		fence = rtfence->fence;
+
+		if (!dma_fence_is_signaled(fence)) {
+			/*
+			 * Is this a CPU update? GPU is busy updating, so return
+			 * an error
+			 */
+			if (!job)
+				return -ETIME;
+
+			dma_fence_get(fence);
+			err = drm_sched_job_add_dependency(&job->drm, fence);
+			if (err)
+				return err;
+		}
+
+		rtfence = xe_range_fence_tree_next(rtfence,
+						   pt_update_ops->start,
+						   pt_update_ops->last);
+	}
+
+	list_for_each_entry(op, &vops->list, link) {
+		err = op_add_deps(vm, op, job);
+		if (err)
+			return err;
+	}
+
+	if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
+		if (job)
+			err = xe_sched_job_last_fence_add_dep(job, vm);
+		else
+			err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
+	}
+
+	for (i = 0; job && !err && i < vops->num_syncs; i++)
+		err = xe_sync_entry_add_deps(&vops->syncs[i], job);
+
+	return err;
+}
+
+static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_vma_ops *vops = pt_update->vops;
+	struct xe_vm *vm = vops->vm;
+	struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id];
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[pt_update->tile_id];
+
+	return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops,
+				     pt_update_ops, rftree);
+}
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+{
+	u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
+	static u32 count;
+
+	if (count++ % divisor == divisor - 1) {
+		uvma->userptr.divisor = divisor << 1;
+		return true;
+	}
+
+	return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+{
+	return false;
+}
+
+#endif
+
+static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
+			     struct xe_vm_pgtable_update_ops *pt_update)
+{
+	struct xe_userptr_vma *uvma;
+	unsigned long notifier_seq;
+
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	if (!xe_vma_is_userptr(vma))
+		return 0;
+
+	uvma = to_userptr_vma(vma);
+	notifier_seq = uvma->userptr.notifier_seq;
+
+	if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm))
+		return 0;
+
+	if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+				     notifier_seq) &&
+	    !xe_pt_userptr_inject_eagain(uvma))
+		return 0;
+
+	if (xe_vm_in_fault_mode(vm)) {
+		return -EAGAIN;
+	} else {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&uvma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+
+		if (xe_vm_in_preempt_fence_mode(vm)) {
+			struct dma_resv_iter cursor;
+			struct dma_fence *fence;
+			long err;
+
+			dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+					    DMA_RESV_USAGE_BOOKKEEP);
+			dma_resv_for_each_fence_unlocked(&cursor, fence)
+				dma_fence_enable_sw_signaling(fence);
+			dma_resv_iter_end(&cursor);
+
+			err = dma_resv_wait_timeout(xe_vm_resv(vm),
+						    DMA_RESV_USAGE_BOOKKEEP,
+						    false, MAX_SCHEDULE_TIMEOUT);
+			XE_WARN_ON(err <= 0);
+		}
+	}
+
+	return 0;
+}
+
+static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
+			    struct xe_vm_pgtable_update_ops *pt_update)
+{
+	int err = 0;
+
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+			break;
+
+		err = vma_check_userptr(vm, op->map.vma, pt_update);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			err = vma_check_userptr(vm, op->remap.prev, pt_update);
+		if (!err && op->remap.next)
+			err = vma_check_userptr(vm, op->remap.next, pt_update);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
+					pt_update);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_vm *vm = pt_update->vops->vm;
+	struct xe_vma_ops *vops = pt_update->vops;
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[pt_update->tile_id];
+	struct xe_vma_op *op;
+	int err;
+
+	err = xe_pt_pre_commit(pt_update);
+	if (err)
+		return err;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	list_for_each_entry(op, &vops->list, link) {
+		err = op_check_userptr(vm, op, pt_update_ops);
+		if (err) {
+			up_read(&vm->userptr.notifier_lock);
+			break;
+		}
+	}
+
+	return err;
+}
+
+struct invalidation_fence {
+	struct xe_gt_tlb_invalidation_fence base;
+	struct xe_gt *gt;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct work_struct work;
+	u64 start;
+	u64 end;
+	u32 asid;
+};
+
+static void invalidation_fence_cb(struct dma_fence *fence,
+				  struct dma_fence_cb *cb)
+{
+	struct invalidation_fence *ifence =
+		container_of(cb, struct invalidation_fence, cb);
+	struct xe_device *xe = gt_to_xe(ifence->gt);
+
+	trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
+	if (!ifence->fence->error) {
+		queue_work(system_wq, &ifence->work);
+	} else {
+		ifence->base.base.error = ifence->fence->error;
+		dma_fence_signal(&ifence->base.base);
+		dma_fence_put(&ifence->base.base);
+	}
+	dma_fence_put(ifence->fence);
+}
+
+static void invalidation_fence_work_func(struct work_struct *w)
+{
+	struct invalidation_fence *ifence =
+		container_of(w, struct invalidation_fence, work);
+	struct xe_device *xe = gt_to_xe(ifence->gt);
+
+	trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base);
+	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+				     ifence->end, ifence->asid);
+}
+
+static void invalidation_fence_init(struct xe_gt *gt,
+				    struct invalidation_fence *ifence,
+				    struct dma_fence *fence,
+				    u64 start, u64 end, u32 asid)
+{
+	int ret;
+
+	trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
+
+	xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
+
+	ifence->fence = fence;
+	ifence->gt = gt;
+	ifence->start = start;
+	ifence->end = end;
+	ifence->asid = asid;
+
+	INIT_WORK(&ifence->work, invalidation_fence_work_func);
+	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
+	if (ret == -ENOENT) {
+		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
+		invalidation_fence_work_func(&ifence->work);
+	} else if (ret) {
+		dma_fence_put(&ifence->base.base);	/* Caller ref */
+		dma_fence_put(&ifence->base.base);	/* Creation ref */
+	}
+
+	xe_gt_assert(gt, !ret || ret == -ENOENT);
+}
+
+struct xe_pt_stage_unbind_walk {
+	/** @base: The pagewalk base-class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're unbinding from. */
+	struct xe_tile *tile;
+
+	/**
+	 * @modified_start: Walk range start, modified to include any
+	 * shared pagetables that we're the only user of and can thus
+	 * treat as private.
+	 */
+	u64 modified_start;
+	/** @modified_end: Walk range start, modified like @modified_start. */
+	u64 modified_end;
+
+	/* Output */
+	/* @wupd: Structure to track the page-table updates we're building */
+	struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+			     const struct xe_pt *child,
+			     enum page_walk_action *action,
+			     struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	unsigned int shift = walk->shifts[level];
+	u64 size = 1ull << shift;
+
+	if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+	    ((next - addr) >> shift) == child->num_live) {
+		u64 size = 1ull << walk->shifts[level + 1];
+
+		*action = ACTION_CONTINUE;
+
+		if (xe_walk->modified_start >= addr)
+			xe_walk->modified_start = round_down(addr, size);
+		if (xe_walk->modified_end <= next)
+			xe_walk->modified_end = round_up(next, size);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+				    unsigned int level, u64 addr, u64 next,
+				    struct xe_ptw **child,
+				    enum page_walk_action *action,
+				    struct xe_pt_walk *walk)
+{
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level);
+
+	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+	return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+	u64 size = 1ull << walk->shifts[--level];
+	int err;
+
+	if (!IS_ALIGNED(addr, size))
+		addr = xe_walk->modified_start;
+	if (!IS_ALIGNED(next, size))
+		next = xe_walk->modified_end;
+
+	/* Parent == *child is the root pt. Don't kill it. */
+	if (parent != *child &&
+	    xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+		return 0;
+
+	if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+				     &end_offset))
+		return 0;
+
+	err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true);
+	if (err)
+		return err;
+
+	xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+	.pt_entry = xe_pt_stage_unbind_entry,
+	.pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @tile: The tile we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
+				       struct xe_vm_pgtable_update *entries)
+{
+	struct xe_pt_stage_unbind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_unbind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+		.modified_start = xe_vma_start(vma),
+		.modified_end = xe_vma_end(vma),
+		.wupd.entries = entries,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+				  struct xe_tile *tile, struct iosys_map *map,
+				  void *ptr, u32 qword_ofs, u32 num_qwords,
+				  const struct xe_vm_pgtable_update *update)
+{
+	struct xe_vm *vm = pt_update->vops->vm;
+	u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level);
+	int i;
+
+	if (map && map->is_iomem)
+		for (i = 0; i < num_qwords; ++i)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, empty);
+	else if (map)
+		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+			 num_qwords);
+	else
+		memset64(ptr, empty, num_qwords);
+}
+
+static void xe_pt_abort_unbind(struct xe_vma *vma,
+			       struct xe_vm_pgtable_update *entries,
+			       u32 num_entries)
+{
+	int i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = num_entries - 1; i >= 0; --i) {
+		struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *pt = entry->pt;
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		pt->num_live += entry->qwords;
+
+		if (!pt->level)
+			continue;
+
+		for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
+			pt_dir->children[j] =
+				entries[i].pt_entries[j - entry->ofs].pt ?
+				&entries[i].pt_entries[j - entry->ofs].pt->base : NULL;
+	}
+}
+
+static void
+xe_pt_commit_prepare_unbind(struct xe_vma *vma,
+			    struct xe_vm_pgtable_update *entries,
+			    u32 num_entries)
+{
+	int i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; ++i) {
+		struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *pt = entry->pt;
+		struct xe_pt_dir *pt_dir;
+
+		pt->num_live -= entry->qwords;
+		if (!pt->level)
+			continue;
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) {
+			entry->pt_entries[j - entry->ofs].pt =
+				xe_pt_entry(pt_dir, j);
+			pt_dir->children[j] = NULL;
+		}
+	}
+}
+
+static void
+xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
+				 struct xe_vma *vma)
+{
+	u32 current_op = pt_update_ops->current_op;
+	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+	int i, level = 0;
+	u64 start, last;
+
+	for (i = 0; i < pt_op->num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &pt_op->entries[i];
+
+		if (entry->pt->level > level)
+			level = entry->pt->level;
+	}
+
+	/* Greedy (non-optimal) calculation but simple */
+	start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level));
+	last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1;
+
+	if (start < pt_update_ops->start)
+		pt_update_ops->start = start;
+	if (last > pt_update_ops->last)
+		pt_update_ops->last = last;
+}
+
+static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
+{
+	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
+
+	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
+					       xe->info.tile_count << shift);
+
+	return 0;
+}
+
+static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
+			   struct xe_vm_pgtable_update_ops *pt_update_ops,
+			   struct xe_vma *vma)
+{
+	u32 current_op = pt_update_ops->current_op;
+	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+	int err;
+
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing bind, with range [%llx...%llx)\n",
+	       xe_vma_start(vma), xe_vma_end(vma) - 1);
+
+	pt_op->vma = NULL;
+	pt_op->bind = true;
+	pt_op->rebind = BIT(tile->id) & vma->tile_present;
+
+	err = vma_reserve_fences(tile_to_xe(tile), vma);
+	if (err)
+		return err;
+
+	err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
+				 &pt_op->num_entries);
+	if (!err) {
+		xe_tile_assert(tile, pt_op->num_entries <=
+			       ARRAY_SIZE(pt_op->entries));
+		xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+					pt_op->num_entries, true);
+
+		xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+		++pt_update_ops->current_op;
+		pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+
+		/*
+		 * If rebind, we have to invalidate TLB on !LR vms to invalidate
+		 * cached PTEs point to freed memory. On LR vms this is done
+		 * automatically when the context is re-enabled by the rebind worker,
+		 * or in fault mode it was invalidated on PTE zapping.
+		 *
+		 * If !rebind, and scratch enabled VMs, there is a chance the scratch
+		 * PTE is already cached in the TLB so it needs to be invalidated.
+		 * On !LR VMs this is done in the ring ops preceding a batch, but on
+		 * non-faulting LR, in particular on user-space batch buffer chaining,
+		 * it needs to be done here.
+		 */
+		if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
+		     xe_vm_in_preempt_fence_mode(vm)))
+			pt_update_ops->needs_invalidation = true;
+		else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
+			/* We bump also if batch_invalidate_tlb is true */
+			vm->tlb_flush_seqno++;
+
+		vma->tile_staged |= BIT(tile->id);
+		pt_op->vma = vma;
+		xe_pt_commit_prepare_bind(vma, pt_op->entries,
+					  pt_op->num_entries, pt_op->rebind);
+	} else {
+		xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
+	}
+
+	return err;
+}
+
+static int unbind_op_prepare(struct xe_tile *tile,
+			     struct xe_vm_pgtable_update_ops *pt_update_ops,
+			     struct xe_vma *vma)
+{
+	u32 current_op = pt_update_ops->current_op;
+	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+	int err;
+
+	if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
+		return 0;
+
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing unbind, with range [%llx...%llx)\n",
+	       xe_vma_start(vma), xe_vma_end(vma) - 1);
+
+	/*
+	 * Wait for invalidation to complete. Can corrupt internal page table
+	 * state if an invalidation is running while preparing an unbind.
+	 */
+	if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma)))
+		mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier);
+
+	pt_op->vma = vma;
+	pt_op->bind = false;
+	pt_op->rebind = false;
+
+	err = vma_reserve_fences(tile_to_xe(tile), vma);
+	if (err)
+		return err;
+
+	pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries);
+
+	xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+				pt_op->num_entries, false);
+	xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+	++pt_update_ops->current_op;
+	pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+	pt_update_ops->needs_invalidation = true;
+
+	xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
+
+	return 0;
+}
+
+static int op_prepare(struct xe_vm *vm,
+		      struct xe_tile *tile,
+		      struct xe_vm_pgtable_update_ops *pt_update_ops,
+		      struct xe_vma_op *op)
+{
+	int err = 0;
+
+	xe_vm_assert_held(vm);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+			break;
+
+		err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
+		pt_update_ops->wait_vm_kernel = true;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		err = unbind_op_prepare(tile, pt_update_ops,
+					gpuva_to_vma(op->base.remap.unmap->va));
+
+		if (!err && op->remap.prev) {
+			err = bind_op_prepare(vm, tile, pt_update_ops,
+					      op->remap.prev);
+			pt_update_ops->wait_vm_bookkeep = true;
+		}
+		if (!err && op->remap.next) {
+			err = bind_op_prepare(vm, tile, pt_update_ops,
+					      op->remap.next);
+			pt_update_ops->wait_vm_bookkeep = true;
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		err = unbind_op_prepare(tile, pt_update_ops,
+					gpuva_to_vma(op->base.unmap.va));
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = bind_op_prepare(vm, tile, pt_update_ops,
+				      gpuva_to_vma(op->base.prefetch.va));
+		pt_update_ops->wait_vm_kernel = true;
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static void
+xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
+{
+	init_llist_head(&pt_update_ops->deferred);
+	pt_update_ops->start = ~0x0ull;
+	pt_update_ops->last = 0x0ull;
+}
+
+/**
+ * xe_pt_update_ops_prepare() - Prepare PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
+ *
+ * Prepare PT update operations which includes updating internal PT state,
+ * allocate memory for page tables, populate page table being pruned in, and
+ * create PT update operations for leaf insertion / removal.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[tile->id];
+	struct xe_vma_op *op;
+	int shift = tile->media_gt ? 1 : 0;
+	int err;
+
+	lockdep_assert_held(&vops->vm->lock);
+	xe_vm_assert_held(vops->vm);
+
+	xe_pt_update_ops_init(pt_update_ops);
+
+	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
+				      tile_to_xe(tile)->info.tile_count << shift);
+	if (err)
+		return err;
+
+	list_for_each_entry(op, &vops->list, link) {
+		err = op_prepare(vops->vm, tile, pt_update_ops, op);
+
+		if (err)
+			return err;
+	}
+
+	xe_tile_assert(tile, pt_update_ops->current_op <=
+		       pt_update_ops->num_ops);
+
+#ifdef TEST_VM_OPS_ERROR
+	if (vops->inject_error &&
+	    vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE)
+		return -ENOSPC;
+#endif
+
+	return 0;
+}
+
+static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
+			   struct xe_vm_pgtable_update_ops *pt_update_ops,
+			   struct xe_vma *vma, struct dma_fence *fence,
+			   struct dma_fence *fence2)
+{
+	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
+		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+				   pt_update_ops->wait_vm_bookkeep ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (fence2)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
+					   pt_update_ops->wait_vm_bookkeep ?
+					   DMA_RESV_USAGE_KERNEL :
+					   DMA_RESV_USAGE_BOOKKEEP);
+	}
+	vma->tile_present |= BIT(tile->id);
+	vma->tile_staged &= ~BIT(tile->id);
+	if (xe_vma_is_userptr(vma)) {
+		lockdep_assert_held_read(&vm->userptr.notifier_lock);
+		to_userptr_vma(vma)->userptr.initial_bind = true;
+	}
+
+	/*
+	 * Kick rebind worker if this bind triggers preempt fences and not in
+	 * the rebind worker
+	 */
+	if (pt_update_ops->wait_vm_bookkeep &&
+	    xe_vm_in_preempt_fence_mode(vm) &&
+	    !current->mm)
+		xe_vm_queue_rebind_worker(vm);
+}
+
+static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
+			     struct xe_vm_pgtable_update_ops *pt_update_ops,
+			     struct xe_vma *vma, struct dma_fence *fence,
+			     struct dma_fence *fence2)
+{
+	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
+		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+				   pt_update_ops->wait_vm_bookkeep ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (fence2)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
+					   pt_update_ops->wait_vm_bookkeep ?
+					   DMA_RESV_USAGE_KERNEL :
+					   DMA_RESV_USAGE_BOOKKEEP);
+	}
+	vma->tile_present &= ~BIT(tile->id);
+	if (!vma->tile_present) {
+		list_del_init(&vma->combined_links.rebind);
+		if (xe_vma_is_userptr(vma)) {
+			lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+			spin_lock(&vm->userptr.invalidated_lock);
+			list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
+			spin_unlock(&vm->userptr.invalidated_lock);
+		}
+	}
+}
+
+static void op_commit(struct xe_vm *vm,
+		      struct xe_tile *tile,
+		      struct xe_vm_pgtable_update_ops *pt_update_ops,
+		      struct xe_vma_op *op, struct dma_fence *fence,
+		      struct dma_fence *fence2)
+{
+	xe_vm_assert_held(vm);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+			break;
+
+		bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
+			       fence2);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		unbind_op_commit(vm, tile, pt_update_ops,
+				 gpuva_to_vma(op->base.remap.unmap->va), fence,
+				 fence2);
+
+		if (op->remap.prev)
+			bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
+				       fence, fence2);
+		if (op->remap.next)
+			bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
+				       fence, fence2);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		unbind_op_commit(vm, tile, pt_update_ops,
+				 gpuva_to_vma(op->base.unmap.va), fence, fence2);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		bind_op_commit(vm, tile, pt_update_ops,
+			       gpuva_to_vma(op->base.prefetch.va), fence, fence2);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static const struct xe_migrate_pt_update_ops migrate_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.clear = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_pre_commit,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.clear = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * xe_pt_update_ops_run() - Run PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
+ *
+ * Run PT update operations which includes committing internal PT state changes,
+ * creating job for PT update operations for leaf insertion / removal, and
+ * installing job fence in various places.
+ *
+ * Return: fence on success, negative ERR_PTR on error.
+ */
+struct dma_fence *
+xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+	struct xe_vm *vm = vops->vm;
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[tile->id];
+	struct dma_fence *fence;
+	struct invalidation_fence *ifence = NULL, *mfence = NULL;
+<<<<<<<
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+=======
+	struct dma_fence_chain *chain_fence = NULL;
+>>>>>>>
+	struct xe_range_fence *rfence;
+	struct xe_vma_op *op;
+	int err = 0, i;
+	struct xe_migrate_pt_update update = {
+		.ops = pt_update_ops->needs_userptr_lock ?
+			&userptr_migrate_ops :
+			&migrate_ops,
+		.vops = vops,
+		.tile_id = tile->id,
+	};
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (!pt_update_ops->current_op) {
+		xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
+
+		return dma_fence_get_stub();
+	}
+
+#ifdef TEST_VM_OPS_ERROR
+	if (vops->inject_error &&
+	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN)
+		return ERR_PTR(-ENOSPC);
+#endif
+
+	if (pt_update_ops->needs_invalidation) {
+		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+		if (!ifence) {
+			err = -ENOMEM;
+			goto kill_vm_tile1;
+		}
+		if (tile->media_gt) {
+			mfence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+			if (!mfence) {
+				err = -ENOMEM;
+				goto free_ifence;
+			}
+<<<<<<<
+			chain_fence = dma_fence_chain_alloc();
+			if (!chain_fence) {
+=======
+			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
+			if (!fences) {
+				err = -ENOMEM;
+				goto free_ifence;
+			}
+			cf = dma_fence_array_alloc(2);
+			if (!cf) {
+>>>>>>>
+				err = -ENOMEM;
+				goto free_ifence;
+			}
+		}
+	}
+
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		err = -ENOMEM;
+		goto free_ifence;
+	}
+
+	fence = xe_migrate_update_pgtables(tile->migrate, &update);
+	if (IS_ERR(fence)) {
+		err = PTR_ERR(fence);
+		goto free_rfence;
+	}
+
+	/* Point of no return - VM killed if failure after this */
+	for (i = 0; i < pt_update_ops->current_op; ++i) {
+		struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
+
+		xe_pt_commit(pt_op->vma, pt_op->entries,
+			     pt_op->num_entries, &pt_update_ops->deferred);
+		pt_op->vma = NULL;	/* skip in xe_pt_update_ops_abort */
+	}
+
+	if (xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+				  &xe_range_fence_kfree_ops,
+				  pt_update_ops->start,
+				  pt_update_ops->last, fence))
+		dma_fence_wait(fence, false);
+
+	/* tlb invalidation must be done before signaling rebind */
+	if (ifence) {
+		if (mfence)
+			dma_fence_get(fence);
+		invalidation_fence_init(tile->primary_gt, ifence, fence,
+					pt_update_ops->start,
+					pt_update_ops->last, vm->usm.asid);
+		if (mfence) {
+			invalidation_fence_init(tile->media_gt, mfence, fence,
+						pt_update_ops->start,
+						pt_update_ops->last, vm->usm.asid);
+<<<<<<<
+			dma_fence_chain_init(chain_fence, &ifence->base.base,
+					     &mfence->base.base, 0);
+			fence = &chain_fence->base;
+=======
+			fences[0] = &ifence->base.base;
+			fences[1] = &mfence->base.base;
+			dma_fence_array_init(cf, 2, fences,
+					     vm->composite_fence_ctx,
+					     vm->composite_fence_seqno++,
+					     false);
+			fence = &cf->base;
+>>>>>>>
+		} else {
+			fence = &ifence->base.base;
+		}
+	}
+
+	if (!mfence) {
+		dma_resv_add_fence(xe_vm_resv(vm), fence,
+				   pt_update_ops->wait_vm_bookkeep ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		list_for_each_entry(op, &vops->list, link)
+			op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
+	} else {
+		dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base,
+				   pt_update_ops->wait_vm_bookkeep ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base,
+				   pt_update_ops->wait_vm_bookkeep ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		list_for_each_entry(op, &vops->list, link)
+			op_commit(vops->vm, tile, pt_update_ops, op,
+				  &ifence->base.base, &mfence->base.base);
+	}
+
+	if (pt_update_ops->needs_userptr_lock)
+		up_read(&vm->userptr.notifier_lock);
+
+	return fence;
+
+free_rfence:
+	kfree(rfence);
+free_ifence:
+<<<<<<<
+	dma_fence_chain_free(chain_fence);
+=======
+	kfree(cf);
+	kfree(fences);
+>>>>>>>
+	kfree(mfence);
+	kfree(ifence);
+kill_vm_tile1:
+	if (err != -EAGAIN && tile->id)
+		xe_vm_kill(vops->vm, false);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_update_ops_fini() - Finish PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operations
+ *
+ * Finish PT update operations by committing to destroy page table memory
+ */
+void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[tile->id];
+	int i;
+
+	lockdep_assert_held(&vops->vm->lock);
+	xe_vm_assert_held(vops->vm);
+
+	for (i = 0; i < pt_update_ops->current_op; ++i) {
+		struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
+
+		xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
+	}
+	xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
+}
+
+/**
+ * xe_pt_update_ops_abort() - Abort PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
+ *
+ *  Abort PT update operations by unwinding internal PT state
+ */
+void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+	struct xe_vm_pgtable_update_ops *pt_update_ops =
+		&vops->pt_update_ops[tile->id];
+	int i;
+
+	lockdep_assert_held(&vops->vm->lock);
+	xe_vm_assert_held(vops->vm);
+
+	for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
+		struct xe_vm_pgtable_update_op *pt_op =
+			&pt_update_ops->ops[i];
+
+		if (!pt_op->vma || i >= pt_update_ops->current_op)
+			continue;
+
+		if (pt_op->bind)
+			xe_pt_abort_bind(pt_op->vma, pt_op->entries,
+					 pt_op->num_entries,
+					 pt_op->rebind);
+		else
+			xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
+					   pt_op->num_entries);
+	}
+
+	xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
+}