summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2014-02-14 14:01:21 +0100
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-02-14 14:18:38 +0100
commit8ea99c928787ba1712b7506b4c56c948c45d84b1 (patch)
treee491adb718426b723cdd29fe2239f29b0501ab4e
parent262de1453184f65e5ccfe45790f93d41f7339d49 (diff)
drm/i915: Only bind each object rather than for every execbuffer
One side-effect of the introduction of ppgtt was that we needed to rebind the object into the appropriate vm (and global gtt in some peculiar cases). For simplicity this was done twice for every object on every call to execbuffer. However, that adds a tremendous amount of CPU overhead (rewriting all the PTE for all objects into WC memory) per draw. The fix is to push all the decision about which vm to bind into and when down into the low-level bind routines through hints rather than performing the bind unconditionally in the execbuffer routine. Note that this is a regression introduced in the full ppgtt feature branch, before this we've only done re-bound objects when the relevant has_(aliasing_ppgtt|global_gtt)_mapping flag was clear. But since that's per-object and not per-vma that optimization broke. v2: Split out prep work and unrelated changes. v3: Bring back functional change around PIN_GLOBAL that I've accidentally split out. v4: Remove the temporary hack for the old binding logic to avoid bisection issues. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906 Tested-by: jianx.zhou@intel.com Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v1) Cc: Ben Widawsky <benjamin.widawsky@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Acked-by: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c5
2 files changed, 12 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ce7064d1ab30..3618bb0cda0a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3285,6 +3285,9 @@ search_free:
WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
trace_i915_vma_bind(vma, flags);
+ vma->bind_vma(vma, obj->cache_level,
+ flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
+
i915_gem_verify_gtt(dev);
return vma;
@@ -3487,7 +3490,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
- vma->bind_vma(vma, cache_level, 0);
+ if (drm_mm_node_allocated(&vma->node))
+ vma->bind_vma(vma, cache_level,
+ obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3838,22 +3843,21 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
ret = i915_vma_unbind(vma);
if (ret)
return ret;
+
+ vma = NULL;
}
}
- if (!i915_gem_obj_bound(obj, vm)) {
-
+ if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
}
- vma = i915_gem_obj_to_vma(obj, vm);
-
- vma->bind_vma(vma, obj->cache_level,
- flags & PIN_GLOBAL ? GLOBAL_BIND : 0);
+ if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
+ vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
- i915_gem_obj_to_vma(obj, vm)->pin_count++;
+ vma->pin_count++;
if (flags & PIN_MAPPABLE)
obj->pin_mappable |= true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b35849bbc367..d7229ad2bd22 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -589,11 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
}
- /* Temporary hack while we rework the binding logic. */
- flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
- !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
- vma->bind_vma(vma, obj->cache_level, flags);
-
return 0;
}