summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2008-07-26 19:28:14 -0700
committerEric Anholt <eric@anholt.net>2008-07-28 11:25:19 -0700
commitf85fd1b42dc2d77266007c02144d4f4f524e4157 (patch)
treeef45bb5f688fb345141c3e8a78902ccad8f84c57
parent04ae66db1c517264cddc786be962fdd393c9c8ac (diff)
intel-gem: Speed up tiled readpixels by tracking which pages have been flushed.
This is around 3x or so speedup, since we would read wide rows at a time, and clflush each tile 8 times as a result. We'll want code related to this anyway when we do fault-based per-page clflushing for sw fallbacks.
-rw-r--r--linux-core/i915_gem.c99
-rw-r--r--shared-core/i915_drv.h6
2 files changed, 79 insertions, 26 deletions
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c
index 4087854c..eea2d488 100644
--- a/linux-core/i915_gem.c
+++ b/linux-core/i915_gem.c
@@ -36,6 +36,12 @@ static int
i915_gem_object_set_domain(struct drm_gem_object *obj,
uint32_t read_domains,
uint32_t write_domain);
+static int
+i915_gem_object_set_domain_range(struct drm_gem_object *obj,
+ uint64_t offset,
+ uint64_t size,
+ uint32_t read_domains,
+ uint32_t write_domain);
int
i915_gem_set_domain(struct drm_gem_object *obj,
struct drm_file *file_priv,
@@ -136,32 +142,11 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
mutex_lock(&dev->struct_mutex);
- /* Do a partial equivalent of i915_gem_set_domain(CPU, 0), as
- * we don't want to clflush whole objects to read a portion of them.
- *
- * The side effect of doing this is that repeated preads of the same
- * contents would take extra clflush overhead, since we don't track
- * flushedness on a page basis.
- */
- if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
- ret = i915_gem_object_wait_rendering(obj);
- if (ret) {
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- return ret;
- }
- }
- if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- int first_page = args->offset / PAGE_SIZE;
- int last_page = (args->offset + args->size - 1) / PAGE_SIZE;
-
- /* If we don't have the page list, the pages are unpinned
- * and swappable, and thus should already be in the CPU domain.
- */
- BUG_ON(obj_priv->page_list == NULL);
-
- drm_ttm_cache_flush(&obj_priv->page_list[first_page],
- last_page - first_page + 1);
+ ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
+ I915_GEM_DOMAIN_CPU, 0);
+ if (ret != 0) {
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
}
offset = args->offset;
@@ -1383,7 +1368,17 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
if ((write_domain | flush_domains) != 0)
obj->write_domain = write_domain;
+
+ /* If we're invalidating the CPU domain, clear the per-page CPU
+ * domain list as well.
+ */
+ if (obj_priv->page_cpu_valid != NULL &&
+ (obj->read_domains & I915_GEM_DOMAIN_CPU) &&
+ ((read_domains & I915_GEM_DOMAIN_CPU) == 0)) {
+ memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
+ }
obj->read_domains = read_domains;
+
dev->invalidate_domains |= invalidate_domains;
dev->flush_domains |= flush_domains;
#if WATCH_BUF
@@ -1396,6 +1391,57 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
}
/**
+ * Set the read/write domain on a range of the object.
+ *
+ * Currently only implemented for CPU reads, otherwise drops to normal
+ * i915_gem_object_set_domain().
+ */
+static int
+i915_gem_object_set_domain_range(struct drm_gem_object *obj,
+ uint64_t offset,
+ uint64_t size,
+ uint32_t read_domains,
+ uint32_t write_domain)
+{
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+ int ret, i;
+
+ if (obj->read_domains & I915_GEM_DOMAIN_CPU)
+ return 0;
+
+ if (read_domains != I915_GEM_DOMAIN_CPU ||
+ write_domain != 0)
+ return i915_gem_object_set_domain(obj,
+ read_domains, write_domain);
+
+ /* Wait on any GPU rendering to the object to be flushed. */
+ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
+ ret = i915_gem_object_wait_rendering(obj);
+ if (ret)
+ return ret;
+ }
+
+ if (obj_priv->page_cpu_valid == NULL) {
+ obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
+ DRM_MEM_DRIVER);
+ }
+
+ /* Flush the cache on any pages that are still invalid from the CPU's
+ * perspective.
+ */
+ for (i = offset / PAGE_SIZE; i < (offset + size - 1) / PAGE_SIZE; i++) {
+ if (obj_priv->page_cpu_valid[i])
+ continue;
+
+ drm_ttm_cache_flush(obj_priv->page_list + i, 1);
+
+ obj_priv->page_cpu_valid[i] = 1;
+ }
+
+ return 0;
+}
+
+/**
* Once all of the objects have been set in the proper domain,
* perform the necessary flush and invalidate operations.
*
@@ -2097,6 +2143,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
i915_gem_object_unbind(obj);
+ drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
}
diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h
index 99416e8d..7bb9e5bb 100644
--- a/shared-core/i915_drv.h
+++ b/shared-core/i915_drv.h
@@ -393,6 +393,12 @@ struct drm_i915_gem_object {
/** Current tiling mode for the object. */
uint32_t tiling_mode;
+
+ /**
+ * Flagging of which individual pages are valid in GEM_DOMAIN_CPU when
+ * GEM_DOMAIN_CPU is not in the object's read domain.
+ */
+ uint8_t *page_cpu_valid;
};
/**