summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-05-16 10:27:45 +0100
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-05-16 10:27:45 +0100
commita47419f95c67665ae67b22de70346ec0a99c7b84 (patch)
tree70406cafe91868124646fef81e363e8b00b482a5 /benchmarks
parent26a5e655883f2b0a2ff903cb24e40d6e2570dbe3 (diff)
gem_wsim: Fence support
Add sync fence dependency support to workload steps. Only one sync fence dependency per step is supported at the moment. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/gem_wsim.c161
-rw-r--r--benchmarks/wsim/README28
2 files changed, 154 insertions, 35 deletions
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 5cf59a68..840353e5 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -76,6 +76,12 @@ enum w_type
QD_THROTTLE
};
+struct deps
+{
+ int nr;
+ int *list;
+};
+
struct w_step
{
/* Workload step metadata */
@@ -83,8 +89,9 @@ struct w_step
unsigned int context;
unsigned int engine;
struct duration duration;
- int nr_deps;
- int *dep;
+ struct deps data_deps;
+ struct deps fence_deps;
+ int emit_fence;
int wait;
/* Implementation details */
@@ -179,28 +186,41 @@ parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
{
char *desc = strdup(_desc);
char *token, *tctx = NULL, *tstart = desc;
- int dep;
igt_assert(desc);
-
- w->nr_deps = 0;
- w->dep = NULL;
+ igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr);
+ igt_assert(!w->data_deps.list &&
+ w->data_deps.list == w->fence_deps.list);
while ((token = strtok_r(tstart, "/", &tctx)) != NULL) {
+ char *str = token;
+ struct deps *deps;
+ int dep;
+
tstart = NULL;
- dep = atoi(token);
+ if (strlen(token) > 1 && token[0] == 'f') {
+ deps = &w->fence_deps;
+ str++;
+ } else {
+ deps = &w->data_deps;
+ }
+
+ dep = atoi(str);
if (dep > 0 || ((int)nr_steps + dep) < 0) {
- if (w->dep)
- free(w-dep);
+ if (deps->list)
+ free(deps->list);
return -1;
}
if (dep < 0) {
- w->nr_deps++;
- w->dep = realloc(w->dep, sizeof(*w->dep) * w->nr_deps);
- igt_assert(w->dep);
- w->dep[w->nr_deps - 1] = dep;
+ deps->nr++;
+ /* Multiple fences not yet supported. */
+ igt_assert(deps->nr == 1 || deps != &w->fence_deps);
+ deps->list = realloc(deps->list,
+ sizeof(*deps->list) * deps->nr);
+ igt_assert(deps->list);
+ deps->list[deps->nr - 1] = dep;
}
}
@@ -220,7 +240,7 @@ parse_workload(char *_desc, unsigned int flags, struct workload *app_w)
struct w_step step, *steps = NULL;
bool bcs_used = false;
unsigned int valid;
- int tmp;
+ int i, j, tmp;
igt_assert(desc);
@@ -332,7 +352,7 @@ parse_workload(char *_desc, unsigned int flags, struct workload *app_w)
}
if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
- unsigned int i, old_valid = valid;
+ unsigned int old_valid = valid;
fstart = NULL;
@@ -441,8 +461,6 @@ add_step:
}
if (app_w) {
- unsigned int i;
-
steps = realloc(steps, sizeof(step) *
(nr_steps + app_w->nr_steps));
igt_assert(steps);
@@ -464,6 +482,19 @@ add_step:
free(desc);
+ /*
+ * Tag all steps which need to emit a sync fence if another step is
+ * referencing them as a sync fence dependency.
+ */
+ for (i = 0; i < nr_steps; i++) {
+ for (j = 0; j < steps[i].fence_deps.nr; j++) {
+ tmp = steps[i].idx + steps[i].fence_deps.list[j];
+ igt_assert(tmp >= 0 && tmp < i);
+ igt_assert(steps[tmp].type == BATCH);
+ steps[tmp].emit_fence = -1;
+ }
+ }
+
if (bcs_used && verbose)
printf("BCS usage in workload with VCS2 remapping enabled!\n");
@@ -608,6 +639,10 @@ eb_update_flags(struct w_step *w, enum intel_engine_id engine,
w->eb.flags |= I915_EXEC_HANDLE_LUT;
w->eb.flags |= I915_EXEC_NO_RELOC;
+
+ igt_assert(w->emit_fence <= 0);
+ if (w->emit_fence)
+ w->eb.flags |= I915_EXEC_FENCE_OUT;
}
static void
@@ -615,7 +650,7 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
{
enum intel_engine_id engine = w->engine;
unsigned int j = 0;
- unsigned int nr_obj = 3 + w->nr_deps;
+ unsigned int nr_obj = 3 + w->data_deps.nr;
unsigned int i;
w->obj = calloc(nr_obj, sizeof(*w->obj));
@@ -631,10 +666,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
igt_assert(j < nr_obj);
}
- for (i = 0; i < w->nr_deps; i++) {
- igt_assert(w->dep[i] <= 0);
- if (w->dep[i]) {
- int dep_idx = w->idx + w->dep[i];
+ for (i = 0; i < w->data_deps.nr; i++) {
+ igt_assert(w->data_deps.list[i] <= 0);
+ if (w->data_deps.list[i]) {
+ int dep_idx = w->idx + w->data_deps.list[i];
igt_assert(dep_idx >= 0 && dep_idx < wrk->nr_steps);
igt_assert(wrk->steps[dep_idx].type == BATCH);
@@ -1231,6 +1266,65 @@ static void init_status_page(struct workload *wrk, unsigned int flags)
}
}
+#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+
+static int __gem_execbuf_wr(int gemfd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ int err = 0;
+ if (igt_ioctl(gemfd, LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf))
+ err = -errno;
+ errno = 0;
+ return err;
+}
+
+static void gem_execbuf_wr(int gemfd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ igt_assert_eq(__gem_execbuf_wr(gemfd, execbuf), 0);
+}
+
+static void
+do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
+ unsigned int flags)
+{
+ unsigned int i;
+
+ eb_update_flags(w, engine, flags);
+
+ wrk->seqno[engine]++;
+
+ if (flags & SEQNO)
+ update_bb_seqno(w, engine, wrk->seqno[engine]);
+ if (flags & RT)
+ update_bb_rt(w, engine, wrk->seqno[engine]);
+
+ w->eb.batch_start_offset =
+ ALIGN(w->bb_sz - get_bb_sz(get_duration(w)),
+ 2 * sizeof(uint32_t));
+
+ for (i = 0; i < w->fence_deps.nr; i++) {
+ int tgt = w->idx + w->fence_deps.list[i];
+
+ /* TODO: fence merging needed to support multiple inputs */
+ igt_assert(i == 0);
+ igt_assert(tgt >= 0 && tgt < w->idx);
+ igt_assert(wrk->steps[tgt].emit_fence > 0);
+
+ w->eb.flags |= I915_EXEC_FENCE_IN;
+ w->eb.rsvd2 = wrk->steps[tgt].emit_fence;
+ }
+
+ if ((w->eb.flags & I915_EXEC_FENCE_IN) ||
+ (w->eb.flags & I915_EXEC_FENCE_OUT))
+ gem_execbuf_wr(fd, &w->eb);
+ else
+ gem_execbuf(fd, &w->eb);
+
+ if (w->eb.flags & I915_EXEC_FENCE_OUT) {
+ w->emit_fence = w->eb.rsvd2 >> 32;
+ igt_assert(w->emit_fence > 0);
+ }
+}
+
static void
run_workload(unsigned int id, struct workload *wrk,
bool background, int pipe_fd,
@@ -1300,22 +1394,12 @@ run_workload(unsigned int id, struct workload *wrk,
engine = balancer->balance(balancer, wrk, w);
wrk->nr_bb[engine]++;
}
- eb_update_flags(w, engine, flags);
-
- wrk->seqno[engine]++;
- if (flags & SEQNO)
- update_bb_seqno(w, engine, wrk->seqno[engine]);
- if (flags & RT)
- update_bb_rt(w, engine, wrk->seqno[engine]);
-
- w->eb.batch_start_offset =
- ALIGN(w->bb_sz - get_bb_sz(get_duration(w)),
- 2 * sizeof(uint32_t));
if (throttle > 0)
w_sync_to(wrk, w, i - throttle);
- gem_execbuf(fd, &w->eb);
+ do_eb(wrk, w, engine, flags);
+
if (w->request != -1) {
igt_list_del(&w->rq_link);
wrk->nrequest[w->request]--;
@@ -1356,6 +1440,15 @@ run_workload(unsigned int id, struct workload *wrk,
}
}
}
+
+ /* Cleanup all fences instantiated in this iteration. */
+ for (i = 0, w = wrk->steps; run && (i < wrk->nr_steps);
+ i++, w++) {
+ if (w->emit_fence > 0) {
+ close(w->emit_fence);
+ w->emit_fence = -1;
+ }
+ }
}
for (i = 0; i < NUM_ENGINES; i++) {
diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
index 5d2a90b2..617b1338 100644
--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
@@ -55,4 +55,30 @@ The above workload described in human language works like this:
When workload descriptors are provided on the command line, commas must be used
instead of new lines.
-Multiple dependencies can be given separated by forward slashes. \ No newline at end of file
+Multiple dependencies can be given separated by forward slashes.
+
+Example:
+
+ 1.VCS1.3000.0.1
+ 1.RCS.3700.0.0
+ 1.VCS2.2300.-1/-2.0
+
+I this case the last step has a data dependency on both first and second steps.
+
+Sync (fd) fences
+----------------
+
+Sync fences are also supported as dependencies.
+
+To use them put a "f<N>" token in the step dependecy list. N is this case the
+same relative step offset to the dependee batch, but instead of the data
+dependency an output fence will be emitted at the dependee step, and passed in
+as a dependency in the current step.
+
+Example:
+
+ 1.VCS1.3000.0.1
+ 1.RCS.500-1000.-1/f-1.0
+
+In this case the second step will have both a data dependency and a sync fence
+dependency on the previous step.