diff options
author | Francisco Jerez <currojerez@riseup.net> | 2016-06-10 16:41:59 -0700 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2016-06-13 15:55:58 -0700 |
commit | a84b5d43e2e54dbebe3600111f4f35c29411f831 (patch) | |
tree | 54630d8e5f460373793422356e95cdee0ebdb578 | |
parent | d960284e447df9b1563deef0ce950617decfba63 (diff) |
i965: Fix cross-primitive scratch corruption when changing the per-thread allocation.
I haven't found any mention of this in the hardware docs, but
experimentally what seems to be going on is that when the per-thread
scratch slot size is changed between two pipelined draw calls, shader
invocations using the old and new scratch size setting may end up
being executed in parallel, causing their scratch offset calculations
to be based in a different partitioning of the scratch space, which
can cause their thread-local scratch space to overlap leading to
cross-thread scratch corruption.
I've been experimenting with alternative workarounds, like emitting a
PIPE_CONTROL with DC flush and CS stall between draw (or dispatch
compute) calls using different per-thread scratch allocation settings,
or avoiding reuse of the scratch BO if the per-thread scratch
allocation doesn't exactly match the original. Both seem to be as
effective as this workaround, but they have potential performance
implications, while this should be basically for free.
Fixes over 40 failures in our CI system with spilling forced on
(including CTS, dEQP and Piglit failures) on a number of different
platforms from Gen4 to Gen9. The 'glsl-max-varyings' piglit test
seems to be able to reproduce this bug consistently in the vertex
shader on at least Gen4, Gen8 and Gen9 with spilling forced on.
Cc: <mesa-stable@lists.freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_gs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_cs_state.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_ds_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_gs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_hs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_ds_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_gs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_hs_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_ps_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_vs_state.c | 2 |
17 files changed, 31 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9618b4a818..6e84506e18 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -674,6 +674,19 @@ struct brw_stage_state /** * Optional scratch buffer used to store spilled register values and * variably-indexed GRF arrays. + * + * The contents of this buffer are short-lived so the same memory can be + * re-used at will for multiple shader programs (executed by the same fixed + * function). However reusing a scratch BO for which shader invocations + * are still in flight with a per-thread scratch slot size other than the + * original can cause threads with different scratch slot size and FFTID + * (which may be executed in parallel depending on the shader stage and + * hardware generation) to map to an overlapping region of the scratch + * space, which can potentially lead to mutual scratch space corruption. + * For that reason if you borrow this scratch buffer you should only be + * using the slot size given by the \c per_thread_scratch member below, + * unless you're taking additional measures to synchronize thread execution + * across slot size changes. */ drm_intel_bo *scratch_bo; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c728f09cac..331949aa1f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -83,7 +83,7 @@ brw_upload_vs_unit(struct brw_context *brw) vs->thread2.scratch_space_base_pointer = stage_state->scratch_bo->offset64 >> 10; /* reloc */ vs->thread2.per_thread_scratch_space = - ffs(brw->vs.prog_data->base.base.total_scratch) - 11; + ffs(stage_state->per_thread_scratch) - 11; } else { vs->thread2.scratch_space_base_pointer = 0; vs->thread2.per_thread_scratch_space = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index bf1bdc9948..dda4f23e55 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -133,7 +133,7 @@ brw_upload_wm_unit(struct brw_context *brw) wm->thread2.scratch_space_base_pointer = brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */ wm->thread2.per_thread_scratch_space = - ffs(prog_data->base.total_scratch) - 11; + ffs(brw->wm.base.per_thread_scratch) - 11; } else { wm->thread2.scratch_space_base_pointer = 0; wm->thread2.per_thread_scratch_space = 0; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 4e4b946346..da7322ebe9 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -140,7 +140,7 @@ upload_gs_state(struct brw_context *brw) if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); /* no scratch space */ } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 3ae00ec29c..0ad74c4fd3 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -126,7 +126,7 @@ upload_vs_state(struct brw_context *brw) if (brw->vs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3e872af489..34aa1218b9 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -220,7 +220,7 @@ gen6_upload_wm_state(struct brw_context *brw, if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index ff308e6f79..5427fa5af1 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -70,21 +70,21 @@ brw_upload_cs_state(struct brw_context *brw) */ OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else if (brw->is_haswell) { /* Haswell's Per Thread Scratch Space is in the range [0, 10] * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. */ OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 12); + ffs(stage_state->per_thread_scratch) - 12); } else { /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB] * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. */ OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - prog_data->total_scratch / 1024 - 1); + stage_state->per_thread_scratch / 1024 - 1); } } else { OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen7_ds_state.c b/src/mesa/drivers/dri/i965/gen7_ds_state.c index 2fe0d88f11..3e1a03b230 100644 --- a/src/mesa/drivers/dri/i965/gen7_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen7_ds_state.c @@ -82,7 +82,7 @@ gen7_upload_ds_state(struct brw_context *brw) if (prog_data->total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 6b126fe8fd..7d733933e2 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -64,7 +64,7 @@ upload_gs_state(struct brw_context *brw) if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->gs.prog_data->base.base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_hs_state.c b/src/mesa/drivers/dri/i965/gen7_hs_state.c index 4f948dc22e..297d61b36f 100644 --- a/src/mesa/drivers/dri/i965/gen7_hs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_hs_state.c @@ -83,7 +83,7 @@ gen7_upload_hs_state(struct brw_context *brw) if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index bb3179df2f..2715b37118 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -56,7 +56,7 @@ upload_vs_state(struct brw_context *brw) if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 8d4d4fc606..8243905a8d 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -235,7 +235,7 @@ gen7_upload_ps_state(struct brw_context *brw, if (prog_data->base.total_scratch) { OUT_RELOC(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c index 0219d072c7..6f01abb76f 100644 --- a/src/mesa/drivers/dri/i965/gen8_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c @@ -52,7 +52,7 @@ gen8_upload_ds_state(struct brw_context *brw) if (prog_data->total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 2741330106..eb0c3dff9c 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -57,7 +57,7 @@ gen8_upload_gs_state(struct brw_context *brw) if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->gs.prog_data->base.base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen8_hs_state.c b/src/mesa/drivers/dri/i965/gen8_hs_state.c index 4f8eba6cd5..e759a648af 100644 --- a/src/mesa/drivers/dri/i965/gen8_hs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_hs_state.c @@ -52,7 +52,7 @@ gen8_upload_hs_state(struct brw_context *brw) if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 51a3121c72..0e7d258e1a 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -255,7 +255,7 @@ gen8_upload_ps_state(struct brw_context *brw, if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index d4a345583d..b7682b553b 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -58,7 +58,7 @@ upload_vs_state(struct brw_context *brw) if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->base.total_scratch) - 11); + ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); |