summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-03-12 21:21:56 -0500
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-04-14 16:42:13 -0500
commit25a8564bea9d58a58fb3cfad842a7045d4edb66a (patch)
tree6218ec960cb40a0ec8e9c7076b3c04e97fb37585
parent41567ddfb67b1a665617d239e22d2e1f5d56087d (diff)
arb_shader_image_load_store: add additional coherency test
The existing coherency test isn't a good match for the AMD GCN execution model.
-rw-r--r--tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test90
1 files changed, 90 insertions, 0 deletions
diff --git a/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test b/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test
new file mode 100644
index 000000000..f718cd2a6
--- /dev/null
+++ b/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test
@@ -0,0 +1,90 @@
+# Additional coherency test that can demonstrate failures in an incorrect
+# coherency implementation for AMD GCN, unlike arb_shader_image_load_store-coherency.
+#
+# The real problem with coherency in AMD GCN is separate, non-coherent L1
+# caches, i.e. when a shader execution writes to an image in a CU that uses
+# one L1 cache, and a different shader execution reads from the image
+# in a CU with a different L1 cache.
+#
+# This test uses atomic accesses to a control texture to select the very first
+# fragment shader thread as a writer thread which keeps changing a data
+# texture in a tight loop. All other threads become reader threads which
+# report success if they see two different values of the same texture.
+#
+# This test can produce a false negative (false failure) in two cases:
+# 1) The timeout value ITERS is too low,
+# 2) There is no (or insufficient) parallelism in the implementation, and
+# therefore the writer thread must finish before most of the reader threads
+# get a chance to run.
+#
+
+[require]
+GL >= 3.3
+GLSL >= 3.30
+GL_ARB_shader_image_load_store
+SIZE 256 256
+
+[vertex shader passthrough]
+
+[fragment shader]
+#version 330
+#extension GL_ARB_shader_image_load_store: enable
+
+// Change this to 0 to get a control test that should fail on hardware
+// without coherent L1 caches.
+//
+// Need volatile instead of just coherent to prevent overly smart compilers
+// from moving the imageLoad/imageStore out of the loop.
+#if 1
+volatile
+#endif
+layout(r32i) uniform iimage2D tex;
+volatile layout(r32i) uniform iimage2D ctrl;
+out vec4 outcolor;
+
+// Add a timeout so that an incorrect coherency implementation doesn't hang
+// the GPU. If this timeout is too low, you can get false negative results
+// because the writer thread quits before all reader threads have
+// executed.
+#define ITERS 100000
+
+void main()
+{
+ int id = imageAtomicAdd(ctrl, ivec2(0, 0), 1);
+ int orig = imageLoad(tex, ivec2(0, 0)).x;
+ bool done = false;
+
+ outcolor = vec4(0.0, 0.0, 0.0, 1.0);
+
+ for (int iter = 0; iter < ITERS && !done; ++iter) {
+ if (id == 0) {
+ imageStore(tex, ivec2(0, 0), ivec4(iter));
+ if (imageLoad(ctrl, ivec2(0, 1)).x >= 256 * 256)
+ done = true;
+ } else {
+ int current = imageLoad(tex, ivec2(0, 0)).x;
+ if (current != orig)
+ done = true;
+ }
+
+ if (done || (id == 0 && iter == 0))
+ imageAtomicAdd(ctrl, ivec2(0, 1), 1);
+ }
+
+ if (done)
+ outcolor.y = 1.0;
+ else
+ outcolor.x = 1.0;
+}
+
+[test]
+texture integer 0 (1, 2) (0, 0) GL_R32I
+image texture 0 GL_R32I
+texture integer 1 (1, 1) (0, 0) GL_R32I
+image texture 1 GL_R32I
+
+uniform int ctrl 0
+uniform int tex 1
+draw rect -1 -1 2 2
+
+probe all rgba 0.0 1.0 0.0 1.0