summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-02-23 13:56:16 +0000
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-03-02 13:49:00 +0000
commit2d4af43ebd4102a9efa11d62ec8a576f22248004 (patch)
treeb41ebc6ff286d969ddd053ac940c725ea0a80c9b
parent11f18f889b1b4e2470cf95331f9f74235da3e4ea (diff)
drm/i915: Multi-batch execbuffer2multieb
For contexts with width set to two or more, we add a mode to execbuf2 which implies there are N batch buffers in the buffer list, each of which will be sent to one of the engines from the engine map array (I915_CONTEXT_PARAM_ENGINES). Those N batches can either be first N, or last N objects in the list as controlled by the existing execbuffer2 flag. The N batches will be submitted to consecutive engines from the previously configured allowed engine array. Input and output fences are fully supported, with the latter getting signalled when all batch buffers have completed. Internally the driver makes an effort to execute these batches simultaneously. Guarantee is that first batch will not be submitted ahead of the subsequent ones. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c119
-rw-r--r--drivers/gpu/drm/i915/i915_request.c4
2 files changed, 111 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 04867deb9017..cdb4ee1595e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2212,6 +2212,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
int batch_index,
unsigned int num_batches,
struct dma_fence *in_fence,
+ struct dma_fence *exec_fence,
struct dma_fence **out_fence,
struct drm_syncobj **fences)
{
@@ -2386,6 +2387,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_request;
}
+ if (exec_fence) {
+ err = i915_request_await_execution(eb.request, exec_fence);
+ if (err < 0)
+ goto err_request;
+ }
+
if (fences) {
err = await_fence_array(&eb, fences);
if (err)
@@ -2581,7 +2588,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
}
err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, -1, 1, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
__i915_gem_execbuf_update_offsets(&exec2, exec2_list);
@@ -2596,12 +2603,12 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_execbuffer2 *args = data;
const size_t count = args->buffer_count;
- struct dma_fence **out_fence_p = NULL;
- struct dma_fence *out_fence = NULL;
+ struct dma_fence **out_fences = NULL;
struct drm_syncobj **fences = NULL;
struct dma_fence *in_fence = NULL;
struct drm_i915_gem_exec_object2 *exec2_list;
- bool fd_installed = false;
+ struct i915_gem_context *ctx;
+ unsigned int num_batches = 1, i;
int out_fence_fd = -1;
int err;
@@ -2621,10 +2628,38 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
}
}
+ ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1);
+ if (!ctx)
+ return -ENOENT;
+
+ if (ctx->width) {
+ if (args->flags & 0x3f) {
+ err = -EINVAL;
+ goto err_out_ctx;
+ }
+
+ num_batches = ctx->width;
+ if (num_batches > count) {
+ err = -EINVAL;
+ goto err_out_ctx;
+ }
+ }
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!in_fence)
- return -EINVAL;
+ if (!in_fence) {
+ err = -EINVAL;
+ goto err_out_ctx;
+ }
+ }
+
+ if ((args->flags & I915_EXEC_FENCE_OUT) || num_batches > 1) {
+ out_fences = kcalloc(num_batches, sizeof(*out_fences),
+ GFP_KERNEL);
+ if (!out_fences) {
+ err = -ENOMEM;
+ goto err_out_fences;
+ }
}
if (args->flags & I915_EXEC_FENCE_OUT) {
@@ -2633,7 +2668,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
err = out_fence_fd;
goto err_out_fence;
}
- out_fence_p = &out_fence;
}
/* Allocate an extra slot for use by the command parser */
@@ -2662,8 +2696,36 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
}
}
- err = i915_gem_do_execbuffer(dev, file, args, exec2_list, -1, 1,
- in_fence, out_fence_p, fences);
+ i = 0;
+
+ if (ctx->width) {
+ args->flags &= ~(0x3fULL);
+ args->flags |= i + 1;
+ }
+
+ err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+ args->flags & I915_EXEC_BATCH_FIRST ?
+ i : count - i - 1,
+ num_batches,
+ in_fence,
+ NULL,
+ out_fences ? &out_fences[0] : NULL,
+ fences);
+
+ for (i = 1; err == 0 && i < num_batches; i++) {
+ args->flags &= ~(0x3fULL);
+ args->flags |= i + 1;
+ args->batch_len = 0;
+
+ err = i915_gem_do_execbuffer(dev, file, args, exec2_list,
+ args->flags & I915_EXEC_BATCH_FIRST ?
+ i : count - i - 1,
+ num_batches,
+ NULL,
+ out_fences ? out_fences[0] : NULL,
+ out_fence_fd >= 0 ? &out_fences[i] : NULL,
+ NULL);
+ }
/*
* Now that we have begun execution of the batchbuffer, we ignore
@@ -2674,6 +2736,35 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
__i915_gem_execbuf_update_offsets(args, exec2_list);
if (out_fence_fd >= 0) {
+ struct dma_fence *out_fence = NULL;
+ bool fd_installed = false;
+
+ if (!err && num_batches > 1) {
+ struct dma_fence_array *fence_array;
+ u32 fence_seqno;
+
+ mutex_lock(&dev->struct_mutex);
+ fence_seqno = ++ctx->timeline->seqno;
+ mutex_unlock(&dev->struct_mutex);
+
+ fence_array = dma_fence_array_create(num_batches,
+ out_fences,
+ ctx->timeline->fence_context,
+ fence_seqno,
+ false);
+ if (fence_array) {
+ out_fences = NULL;
+ out_fence = &fence_array->base;
+ } else {
+ for (i = 0; i < num_batches; i++) {
+ if (out_fences[i])
+ dma_fence_put(out_fences[i]);
+ }
+ }
+ } else if (!err) {
+ out_fence = out_fences[0];
+ }
+
if (out_fence) {
struct sync_file *sync_fence;
@@ -2692,7 +2783,11 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
args->rsvd2 |= (u64)out_fence_fd << 32;
- out_fence_fd = -1;
+
+ if (out_fence_fd >= 0)
+ out_fence_fd = -1;
+ } else if (out_fences) {
+ dma_fence_put(out_fences[0]);
}
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
@@ -2706,8 +2801,12 @@ err_alloc:
if (out_fence_fd >= 0)
put_unused_fd(out_fence_fd);
err_out_fence:
+ kfree(out_fences);
+err_out_fences:
if (in_fence)
dma_fence_put(in_fence);
+err_out_ctx:
+ i915_gem_context_put(ctx);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6e4238474606..778686c8c6f4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1114,11 +1114,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
i915_sw_fence_await_sw_fence(&request->submit,
&prev->submit,
&request->submitq);
- else
+ else if (!request->ctx->width /* Hack for parallel eb. */)
__i915_sw_fence_await_dma_fence(&request->submit,
&prev->fence,
&request->dmaq);
- if (engine->schedule)
+ if (engine->schedule && !request->ctx->width /* Hack for parallel eb. */)
__i915_priotree_add_dependency(&request->priotree,
&prev->priotree,
&request->dep,