summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2008-07-23 10:07:16 -0700
committerEric Anholt <eric@anholt.net>2008-07-23 10:10:54 -0700
commit439d7106832f2e9742deb900d96f1d3bc07162b1 (patch)
treef016507c4003135279ac53cb8c5521217306c8a0
parentbddb952578d58c4dcfafe969c045a39d27666b56 (diff)
intel-gem: Add a quick hack to reduce clflushing on pread.
This increases overhead for the large-readpixels case due to the repeated page cache accessing, but greatly reduces overhead for the small-readpixels case.
-rw-r--r--linux-core/i915_gem.c57
1 files changed, 50 insertions, 7 deletions
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c
index ca2dd19c..db068ce3 100644
--- a/linux-core/i915_gem.c
+++ b/linux-core/i915_gem.c
@@ -55,6 +55,9 @@ i915_gem_set_domain(struct drm_gem_object *obj,
struct drm_file *file_priv,
uint32_t read_domains,
uint32_t write_domain);
+static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
+static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
+static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
static void
i915_gem_clflush_object(struct drm_gem_object *obj);
@@ -128,6 +131,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_pread *args = data;
struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
ssize_t read;
loff_t offset;
int ret;
@@ -135,15 +139,52 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL)
return -EINVAL;
+ obj_priv = obj->driver_private;
- mutex_lock(&dev->struct_mutex);
- ret = i915_gem_set_domain(obj, file_priv,
- I915_GEM_DOMAIN_CPU, 0);
- if (ret) {
+ /* Bounds check source.
+ *
+ * XXX: This could use review for overflow issues...
+ */
+ if (args->offset > obj->size || args->size > obj->size ||
+ args->offset + args->size > obj->size) {
drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- return ret;
+ return -EFAULT;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+ /* Do a partial equivalent of i915_gem_set_domain(CPU, 0), as
+ * we don't want to clflush whole objects to read a portion of them.
+ *
+ * The side effect of doing this is that repeated preads of the same
+ * contents would take extra clflush overhead, since we don't track
+ * flushedness on a page basis.
+ */
+ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
+ ret = i915_gem_object_wait_rendering(obj);
+ if (ret) {
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+ }
}
+ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+ int got_page_list = 0;
+ int first_page = args->offset / PAGE_SIZE;
+ int last_page = (args->offset + args->size) / PAGE_SIZE;
+
+ if (obj_priv->page_list == NULL) {
+ i915_gem_object_get_page_list(obj);
+ got_page_list = 1;
+ }
+
+ drm_ttm_cache_flush(&obj_priv->page_list[first_page],
+ last_page - first_page + 1);
+
+ if (got_page_list)
+ i915_gem_object_free_page_list(obj);
+ }
+
offset = args->offset;
read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr,
@@ -329,8 +370,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* XXX: This could use review for overflow issues...
*/
if (args->offset > obj->size || args->size > obj->size ||
- args->offset + args->size > obj->size)
+ args->offset + args->size > obj->size) {
+ drm_gem_object_unreference(obj);
return -EFAULT;
+ }
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get