summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-09-21 10:29:41 +0100
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-09-29 13:42:08 +0100
commitb4d45c67b9c49cb294d979d1ae3b1be9abf7ec2c (patch)
tree3b16f40e7f742c0f28c44edc66e63f454d1835dc
parentae2df5eb2fd3b62a957b7330b85a87d91de961be (diff)
drm/i915: Per batch buffer VCS balancingcontext-bb-balancing
We add an interface to allow userspace to mark contexts as concurrent, meaning two things: 1. Batch buffers to the same engine are not implicitly serialized. 2. Context state cannot be relied on. Legacy execbuf, with contexts mark as concurrent, can now load balance between individual batches, instead of only statically per client. This also means two batches submitted one after another, both to I915_EXEC_BSD, can potentially be running in parallel on VCS0 and VCS1 respectively. For normal (non-concurrent) context behaviour is unchanged. In both cases simple round-robin approach is used to load balance. If execbuf requires a a particular engine feature, like for example HEVC, it needs to mark it's execbuf calls appropriately. For the class/instance based execbuf we add I915_EXEC_INSTANCE_ANY to accomplish the same behaviour. This is only allowed to be used on concurrent contexts or an error will be returned. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h7
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c13
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.h20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c75
-rw-r--r--include/uapi/drm/i915_drm.h7
6 files changed, 106 insertions, 18 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5d0a8dbcf67..cd7d633b1980 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1528,9 +1528,6 @@ struct i915_gem_mm {
u64 unordered_timeline;
- /* the indicator for dispatch video commands on two BSD rings */
- atomic_t bsd_engine_dispatch_index;
-
/** Bit 6 swizzling required for X tiling */
uint32_t bit_6_swizzle_x;
/** Bit 6 swizzling required for Y tiling */
@@ -2256,6 +2253,10 @@ struct drm_i915_private {
struct intel_engine_cs *engine[I915_NUM_ENGINES];
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1];
+
+ /* the indicator for dispatch video commands on two BSD rings */
+ atomic_t vcs_dispatch_index;
+
struct i915_vma *semaphore;
struct drm_dma_handle *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 18f9f0b541b8..3594e8788a6c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4911,7 +4911,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
- atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
+ atomic_set(&dev_priv->vcs_dispatch_index, 0);
spin_lock_init(&dev_priv->fb_tracking.lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 921ee369c74d..60accc297193 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -273,6 +273,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->contexts.list);
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
+ atomic_set(&ctx->vcs_dispatch_index,
+ atomic_fetch_xor(1, &dev_priv->vcs_dispatch_index));
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);
@@ -1036,6 +1038,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_BANNABLE:
args->value = i915_gem_context_is_bannable(ctx);
break;
+ case I915_CONTEXT_PARAM_CONCURRENT:
+ args->value = i915_gem_context_is_concurrent(ctx);
+ break;
default:
ret = -EINVAL;
break;
@@ -1091,6 +1096,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
else
i915_gem_context_clear_bannable(ctx);
break;
+ case I915_CONTEXT_PARAM_CONCURRENT:
+ if (args->size)
+ ret = -EINVAL;
+ else if (args->value)
+ i915_gem_context_set_concurrent(ctx);
+ else
+ i915_gem_context_clear_concurrent(ctx);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 44688e22a5c2..a0662502d386 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -115,6 +115,7 @@ struct i915_gem_context {
#define CONTEXT_BANNABLE 3
#define CONTEXT_BANNED 4
#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_CONCURRENT 6
/**
* @hw_id: - unique identifier for the context
@@ -192,6 +193,8 @@ struct i915_gem_context {
* context close.
*/
struct list_head handles_list;
+
+ atomic_t vcs_dispatch_index;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -255,6 +258,23 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
}
+static inline bool
+i915_gem_context_is_concurrent(const struct i915_gem_context *ctx)
+{
+ return test_bit(CONTEXT_CONCURRENT, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_concurrent(struct i915_gem_context *ctx)
+{
+ __set_bit(CONTEXT_CONCURRENT, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_concurrent(struct i915_gem_context *ctx)
+{
+ __clear_bit(CONTEXT_CONCURRENT, &ctx->flags);
+}
+
static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
{
return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bc896c87f8fa..d5e3a67f7344 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -235,6 +235,7 @@ struct i915_execbuffer {
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
+ bool ctx_concurrent;
u32 batch_start_offset; /** Location within object of batch */
u32 batch_len; /** Length of batch within object */
@@ -671,6 +672,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
eb->ctx = ctx;
eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
+ eb->ctx_concurrent = i915_gem_context_is_concurrent(ctx);
+
eb->context_flags = 0;
if (ctx->flags & CONTEXT_NO_ZEROMAP)
eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
@@ -1981,26 +1984,61 @@ static int eb_submit(struct i915_execbuffer *eb)
return 0;
}
+static unsigned int select_vcs_engine(struct i915_execbuffer *eb, u64 eb_flags)
+{
+ struct drm_i915_private *i915 = eb->i915;
+ u8 eb_caps = (eb_flags & I915_EXEC_ENGINE_CAP_MASK) >>
+ I915_EXEC_ENGINE_CAP_SHIFT;
+ unsigned int instance;
+
+ if (!HAS_BSD2(i915))
+ return 0;
+
+ instance = atomic_fetch_xor(1, &eb->ctx->vcs_dispatch_index);
+
+ if (instance == 1 &&
+ (eb_caps & i915->engine[_VCS(instance)]->caps) != eb_caps)
+ instance = 0;
+
+ return instance;
+}
+
/**
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
*/
static unsigned int
-gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
- struct drm_file *file)
+gen8_dispatch_bsd_engine(struct i915_execbuffer *eb, struct drm_file *file,
+ u64 eb_flags)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
+ unsigned int instance;
- /* Check whether the file_priv has already selected one ring. */
- if ((int)file_priv->bsd_engine < 0)
- file_priv->bsd_engine = atomic_fetch_xor(1,
- &dev_priv->mm.bsd_engine_dispatch_index);
+ if (eb->ctx_concurrent) {
+ /*
+ * For concurrent contexts do a round-robin engine assignment
+ * for each batch buffer.
+ */
+ instance = select_vcs_engine(eb, eb_flags);
+ } else if ((int)file_priv->bsd_engine < 0) {
+ /*
+ * For normal contexts check whether the file_priv has already
+ * selected one engine and if not select one.
+ */
+ instance = file_priv->bsd_engine =
+ atomic_fetch_xor(1, &eb->i915->vcs_dispatch_index);
+ } else {
+ /*
+ * Otherwise use the previously selected engine.
+ */
+ instance = file_priv->bsd_engine;
+ }
- return file_priv->bsd_engine;
+ return instance;
}
static struct intel_engine_cs *
-eb_select_engine_class_instance(struct drm_i915_private *i915, u64 eb_flags)
+eb_select_engine_class_instance(struct i915_execbuffer *eb, u64 eb_flags)
{
u8 class = eb_flags & I915_EXEC_RING_MASK;
u8 instance = (eb_flags & I915_EXEC_INSTANCE_MASK) >>
@@ -2009,7 +2047,14 @@ eb_select_engine_class_instance(struct drm_i915_private *i915, u64 eb_flags)
I915_EXEC_ENGINE_CAP_SHIFT;
struct intel_engine_cs *engine;
- engine = intel_engine_lookup_user(i915, class, instance);
+ if (instance == I915_EXEC_INSTANCE_ANY) {
+ if (!eb->ctx_concurrent)
+ return NULL;
+
+ instance = select_vcs_engine(eb, eb_flags);
+ }
+
+ engine = intel_engine_lookup_user(eb->i915, class, instance);
if (engine && ((caps & engine->caps) != caps))
return NULL;
@@ -2028,15 +2073,16 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
};
static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args)
{
+ struct drm_i915_private *dev_priv = eb->i915;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
struct intel_engine_cs *engine;
if (args->flags & I915_EXEC_CLASS_INSTANCE)
- return eb_select_engine_class_instance(dev_priv, args->flags);
+ return eb_select_engine_class_instance(eb, args->flags);
if (user_ring_id > I915_USER_RINGS) {
DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
@@ -2054,7 +2100,8 @@ eb_select_engine(struct drm_i915_private *dev_priv,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
- bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+ bsd_idx = gen8_dispatch_bsd_engine(eb, file,
+ args->flags);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2269,8 +2316,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_destroy;
err = -EINVAL;
- eb.engine = eb_select_engine(eb.i915, file, args);
- if (!eb.engine)
+ eb.engine = eb_select_engine(&eb, file, args);
+ if (unlikely(!eb.engine))
goto err_engine;
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ed12c0ccb4b6..88ba5934b793 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1018,6 +1018,12 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_INSTANCE_MASK (0xff << I915_EXEC_INSTANCE_SHIFT)
/*
+ * Batches sent with instance set to any can be load balanced by the driver
+ * is concurrent context flag is also enabled.
+ */
+#define I915_EXEC_INSTANCE_ANY (0xff)
+
+/*
* Inform the kernel of what engine capabilities this batch buffer
* requires. For example only the first VCS engine has the HEVC block.
*
@@ -1439,6 +1445,7 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
#define I915_CONTEXT_PARAM_BANNABLE 0x5
+#define I915_CONTEXT_PARAM_CONCURRENT 0x6
__u64 value;
};