summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile.am12
-rw-r--r--configure.ac7
-rw-r--r--misc/spirv_as.py119
-rw-r--r--src/tests/func/shader_ballot/amd_shader_ballot.c779
-rw-r--r--src/tests/func/shader_ballot/group_all.spv79
-rw-r--r--src/tests/func/shader_ballot/group_any.glsl23
-rw-r--r--src/tests/func/shader_ballot/group_any.spv79
-rw-r--r--src/tests/func/shader_ballot/group_broadcast.spv57
-rw-r--r--src/tests/func/shader_ballot/group_iadd.glsl16
-rw-r--r--src/tests/func/shader_ballot/group_iadd.spv57
-rw-r--r--src/tests/func/shader_ballot/group_iadd_excl.glsl16
-rw-r--r--src/tests/func/shader_ballot/group_iadd_excl.spv57
-rw-r--r--src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv70
-rw-r--r--src/tests/func/shader_ballot/group_iadd_incl.glsl16
-rw-r--r--src/tests/func/shader_ballot/group_iadd_incl.spv57
-rw-r--r--src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl18
-rw-r--r--src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv70
-rw-r--r--src/tests/func/shader_ballot/group_iadd_nonuniform.glsl18
-rw-r--r--src/tests/func/shader_ballot/group_iadd_nonuniform.spv70
-rw-r--r--src/tests/func/shader_ballot/test.spv57
21 files changed, 1678 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 88dbb08..9b99024 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@
# generated files
*-spirv.h
+*-spirv-as.h
/data/grass-*.jpg
/data/grass-*.png
/data/pink-leaves-*.jpg
diff --git a/Makefile.am b/Makefile.am
index 7e9be29..64da935 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -141,6 +141,15 @@ BUILT_SOURCES = \
src/tests/func/shader/pack_unpack-spirv.h \
src/tests/func/shader_ballot/ext_shader_ballot-spirv.h \
src/tests/func/shader_ballot/amd_shader_ballot-spirv.h \
+ src/tests/func/shader_ballot/group_all-spirv-as.h \
+ src/tests/func/shader_ballot/group_any-spirv-as.h \
+ src/tests/func/shader_ballot/group_broadcast-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd_nonuniform-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd_excl-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd_excl_nonuniform-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd_incl-spirv-as.h \
+ src/tests/func/shader_ballot/group_iadd_incl_nonuniform-spirv-as.h \
src/tests/func/shader_group_vote/ext_shader_subgroup_vote-spirv.h \
src/tests/func/ssbo/interleave-spirv.h \
src/tests/func/sync/semaphore-fd-spirv.h \
@@ -152,6 +161,9 @@ bin_crucible_LDADD = $(MESA_LDFLAGS) -lm -lvulkan -lpthread $(libpng_LIBS) \
%-spirv.h: %.c misc/glsl_scraper.py
$(AM_V_GEN) $(PYTHON3) $(srcdir)/misc/glsl_scraper.py --with-glslc=$(GLSLC) -o $@ $<
+%-spirv-as.h: %.spv misc/spirv_as.py
+ $(AM_V_GEN) $(PYTHON3) $(srcdir)/misc/spirv_as.py --with-spirv-as=$(SPIRV_AS) -o $@ $<
+
%_gen.c: %_gen.py
$(AM_V_GEN) $(PYTHON3) $<
diff --git a/configure.ac b/configure.ac
index 9e137e7..74027d8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -55,6 +55,13 @@ if test "x$GLSLC" = "x"; then
AC_MSG_ERROR([failed to find glslc])
fi
+AC_ARG_VAR([SPIRV_AS], [path to spirv-as executable])
+AC_SUBST([SPIRV_AS])
+AC_CHECK_PROGS([SPIRV_AS], [spirv-as])
+if test "x$SPIRV_AS" = "x"; then
+ AC_MSG_ERROR([failed to find spirv-as])
+fi
+
PKG_CHECK_MODULES([libpng16], [libpng16], [HAVE_LIBPNG16=1], [HAVE_LIBPNG16=0])
AM_CONDITIONAL([NO_PNG16], [test $HAVE_LIBPNG16 = 0])
AM_COND_IF([NO_PNG16],
diff --git a/misc/spirv_as.py b/misc/spirv_as.py
new file mode 100644
index 0000000..5e31157
--- /dev/null
+++ b/misc/spirv_as.py
@@ -0,0 +1,119 @@
+#! /usr/bin/env python3
+
+import argparse
+import io
+import os
+import re
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+from textwrap import dedent
+
+class ShaderCompileError(RuntimeError):
+ def __init__(self, *args):
+ super(ShaderCompileError, self).__init__(*args)
+
+class Shader:
+ def __init__(self, in_file):
+ self.dwords = None
+ self.in_file = in_file
+ self.name = os.path.splitext(os.path.basename(in_file))[0]
+
+ def __run_spirv_as(self):
+ with subprocess.Popen([spirv_as] +
+ ['-o', '-', self.in_file],
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE) as proc:
+
+ out, err = proc.communicate(timeout=30)
+
+ if proc.returncode != 0:
+ # Unfortunately, glslang dumps errors to standard out.
+ # However, since we don't really want to count on that,
+ # we'll grab the output of both
+ message = out.decode('utf-8') + '\n' + err.decode('utf-8')
+ raise ShaderCompileError(message.strip())
+
+ return out
+
+ def compile(self):
+ def dwords(f):
+ while True:
+ dword_str = f.read(4)
+ if not dword_str:
+ return
+ assert len(dword_str) == 4
+ yield struct.unpack('I', dword_str)[0]
+
+ spirv = self.__run_spirv_as()
+ self.dwords = list(dwords(io.BytesIO(spirv)))
+
+ def dump_c_code(self, f):
+ f.write('static const uint32_t __{0}_spir_v_src[] = {{'.format(self.name))
+ line_start = 0
+ while line_start < len(self.dwords):
+ f.write('\n ')
+ for i in range(line_start, min(line_start + 6, len(self.dwords))):
+ f.write(' 0x{:08x},'.format(self.dwords[i]))
+ line_start += 6
+ f.write('\n};\n\n')
+
+ f.write(dedent("""\
+ #define {0}_info \
+ .spirvSize = sizeof(__{0}_spir_v_src), \
+ .pSpirv = __{0}_spir_v_src
+ """.format(self.name)))
+
+def open_file(name, mode):
+ if name == '-':
+ if mode == 'w':
+ return sys.stdout
+ elif mode == 'r':
+ return sys.stdin
+ else:
+ assert False
+ else:
+ return open(name, mode)
+
+def parse_args():
+ description = dedent("""\
+ This program assembles a SPIR-V shader and emits a header with the
+ resulting binary that can be included by a test.
+
+ If '-' is passed as the input file or output file, stdin or stdout
+ will be used instead of a file on disc.""")
+
+ p = argparse.ArgumentParser(
+ description=description,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ p.add_argument('-o', '--outfile', default='-',
+ help='Output to the given file (default: stdout).')
+ p.add_argument('--with-spirv-as', metavar='PATH',
+ default='spirv-as',
+ dest='spirv_as',
+ help='Full path to the spirv-as assembler.')
+ p.add_argument('infile', metavar='INFILE')
+
+ return p.parse_args()
+
+
+args = parse_args()
+infname = args.infile
+outfname = args.outfile
+spirv_as = args.spirv_as
+
+shader = Shader(infname)
+shader.compile()
+
+with open_file(outfname, 'w') as outfile:
+ outfile.write(dedent("""\
+ /* ========================== DO NOT EDIT! ==========================
+ * This file is autogenerated by spirv_as.py.
+ */
+
+ #include <stdint.h>
+
+ """))
+ shader.dump_c_code(outfile)
diff --git a/src/tests/func/shader_ballot/amd_shader_ballot.c b/src/tests/func/shader_ballot/amd_shader_ballot.c
index 5f88ba9..c4e6ee8 100644
--- a/src/tests/func/shader_ballot/amd_shader_ballot.c
+++ b/src/tests/func/shader_ballot/amd_shader_ballot.c
@@ -24,6 +24,15 @@
#include <stdio.h>
#include "amd_shader_ballot-spirv.h"
+#include "group_iadd-spirv-as.h"
+#include "group_iadd_nonuniform-spirv-as.h"
+#include "group_iadd_incl-spirv-as.h"
+#include "group_iadd_incl_nonuniform-spirv-as.h"
+#include "group_iadd_excl-spirv-as.h"
+#include "group_iadd_excl_nonuniform-spirv-as.h"
+#include "group_all-spirv-as.h"
+#include "group_any-spirv-as.h"
+#include "group_broadcast-spirv-as.h"
static void
basic(void)
@@ -62,6 +71,336 @@ test_define {
};
static void
+fadd(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ uint bitCount64(uint64_t val) {
+ uvec2 split = unpackUint2x32(val);
+ return bitCount(split.x) + bitCount(split.y);
+ }
+
+ void main() {
+ float sum = addInvocationsNonUniformAMD(1.);
+
+ uint count = bitCount64(ballotARB(true));
+ if (sum != float(count)) {
+ f_color = vec4(1.0, float(sum) / 255., float(count) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.fadd",
+ .start = fadd,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+fadd_double(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ uint bitCount64(uint64_t val) {
+ uvec2 split = unpackUint2x32(val);
+ return bitCount(split.x) + bitCount(split.y);
+ }
+
+ void main() {
+ double sum = addInvocationsNonUniformAMD(1.0LF);
+
+ uint count = bitCount64(ballotARB(true));
+ if (sum != double(count)) {
+ f_color = vec4(1.0, float(sum) / 255., float(count) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.fadd-double",
+ .start = fadd_double,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+umin(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findLSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int lsb = findLSB(split.x);
+ if (lsb >= 0)
+ return lsb;
+
+ lsb = findLSB(split.y);
+ if (lsb >= 0)
+ return 32 + lsb;
+
+ return -1;
+ }
+
+ void main() {
+ uint minInvocation = minInvocationsNonUniformAMD(gl_SubGroupInvocationARB);
+
+ uint minInvocation2 = findLSB64(ballotARB(true));
+ if (minInvocation != minInvocation2) {
+ f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.umin",
+ .start = umin,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+imin(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findLSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int lsb = findLSB(split.x);
+ if (lsb >= 0)
+ return lsb;
+
+ lsb = findLSB(split.y);
+ if (lsb >= 0)
+ return 32 + lsb;
+
+ return -1;
+ }
+
+ void main() {
+ uint minInvocation = uint(minInvocationsNonUniformAMD(int(gl_SubGroupInvocationARB)));
+
+ uint minInvocation2 = findLSB64(ballotARB(true));
+ if (minInvocation != minInvocation2) {
+ f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.imin",
+ .start = imin,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+test_fmin(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findLSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int lsb = findLSB(split.x);
+ if (lsb >= 0)
+ return lsb;
+
+ lsb = findLSB(split.y);
+ if (lsb >= 0)
+ return 32 + lsb;
+
+ return -1;
+ }
+
+ void main() {
+ uint minInvocation = uint(minInvocationsNonUniformAMD(float(gl_SubGroupInvocationARB)));
+
+ uint minInvocation2 = findLSB64(ballotARB(true));
+ if (minInvocation != minInvocation2) {
+ f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.fmin",
+ .start = test_fmin,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+umax(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findMSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int msb = findMSB(split.y);
+ if (msb >= 0)
+ return 32 + msb;
+
+ return findMSB(split.x);
+ }
+
+ void main() {
+ uint maxInvocation = maxInvocationsNonUniformAMD(gl_SubGroupInvocationARB);
+
+ uint maxInvocation2 = findMSB64(ballotARB(true));
+ if (maxInvocation != maxInvocation2) {
+ f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.umax",
+ .start = umax,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+imax(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findMSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int msb = findMSB(split.y);
+ if (msb >= 0)
+ return 32 + msb;
+
+ return findMSB(split.x);
+ }
+
+ void main() {
+ uint maxInvocation = uint(maxInvocationsNonUniformAMD(int(gl_SubGroupInvocationARB)));
+
+ uint maxInvocation2 = findMSB64(ballotARB(true));
+ if (maxInvocation != maxInvocation2) {
+ f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.imax",
+ .start = imax,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
+test_fmax(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ int findMSB64(uint64_t v)
+ {
+ uvec2 split = unpackUint2x32(v);
+ int msb = findMSB(split.y);
+ if (msb >= 0)
+ return 32 + msb;
+
+ return findMSB(split.x);
+ }
+
+ void main() {
+ uint maxInvocation = uint(maxInvocationsNonUniformAMD(float(gl_SubGroupInvocationARB)));
+
+ uint maxInvocation2 = findMSB64(ballotARB(true));
+ if (maxInvocation != maxInvocation2) {
+ f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.fmax",
+ .start = test_fmax,
+ .image_filename = "32x32-green.ref.png",
+};
+
+static void
inclusive_scan_iadd(void)
{
t_require_ext("VK_EXT_shader_subgroup_ballot");
@@ -104,6 +443,48 @@ test_define {
.image_filename = "32x32-green.ref.png",
};
+static void
+exclusive_scan_iadd(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+ VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT,
+///#extension GL_ARB_gpu_shader_int64 : enable
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+ layout(location = 0) out vec4 f_color;
+
+ uint bitCount64(uint64_t val) {
+ uvec2 split = unpackUint2x32(val);
+ return bitCount(split.x) + bitCount(split.y);
+ }
+
+ uint mbcnt() {
+ uint64_t active_set = ballotARB(true);
+ uint invocation = gl_SubGroupInvocationARB;
+ uint64_t mask_le = invocation == 63 ? ~0ul : (1ul << (invocation + 1)) - 1;
+ return bitCount64(active_set & mask_le);
+ }
+
+ void main() {
+ int sum = addInvocationsExclusiveScanNonUniformAMD(1);
+
+ int count = int(mbcnt()) - 1;
+ if (sum != count) {
+ f_color = vec4(1.0, float(sum) / 255., 0.0, float(gl_SubGroupInvocationARB) / 255.);
+ } else {
+ f_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ }
+ );
+ run_simple_pipeline(fs, NULL, 0);
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.exclusive-scan-iadd",
+ .start = exclusive_scan_iadd,
+ .image_filename = "32x32-green.ref.png",
+};
static VkDeviceMemory
common_init(VkShaderModule cs, const uint32_t ssbo_size)
{
@@ -244,6 +625,404 @@ test_define {
};
static void
+group_iadd_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[i] == 256,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], 4 * 64);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-compute",
+ .start = group_iadd_compute,
+ .no_image = true,
+};
+
+static void
+group_iadd_nonuniform_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_nonuniform_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i += 2) {
+ t_assertf(map_out[i] == 256 / 2,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], 4 * 64);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-nonuniform-compute",
+ .start = group_iadd_nonuniform_compute,
+ .no_image = true,
+};
+
+static void
+group_iadd_inclusive_scan_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_incl_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[i] == i + 1,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], i + 1);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-inclusive-scan-compute",
+ .start = group_iadd_inclusive_scan_compute,
+ .no_image = true,
+};
+
+static void
+group_iadd_inclusive_scan_nonuniform_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_incl_nonuniform_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i += 2) {
+ t_assertf(map_out[i] == i / 2 + 1,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], i / 2 + 1);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-inclusive-scan-nonuniform-compute",
+ .start = group_iadd_inclusive_scan_nonuniform_compute,
+ .no_image = true,
+};
+
+static void
+group_iadd_exclusive_scan_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_excl_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[i] == i,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], i);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-exclusive-scan-compute",
+ .start = group_iadd_exclusive_scan_compute,
+ .no_image = true,
+};
+
+static void
+group_iadd_exclusive_scan_nonuniform_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_excl_nonuniform_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i += 2) {
+ t_assertf(map_out[i] == i / 2,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], i / 2);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-iadd-exclusive-scan-nonuniform-compute",
+ .start = group_iadd_exclusive_scan_nonuniform_compute,
+ .no_image = true,
+};
+
+static void
+group_any_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_any_info);
+
+ const uint32_t ssbo_size = 3 * 256 * 2 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ /* first workgroup: alternating 1's and 0's */
+ for (int i = 0; i < 256; i++)
+ map_out[2 * i] = (i % 2 == 0);
+ /* second workgroup: all 0 */
+ for (int i = 256; i < 2 * 256; i++)
+ map_out[2 * i] = 0;
+ /* third workgroup: all 1 */
+ for (int i = 2 * 256; i < 3 * 256; i++)
+ map_out[2 * i] = 1;
+
+ dispatch_and_wait(3, 1, 1);
+
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 1,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 1);
+ }
+ for (unsigned i = 256; i < 2 * 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 0,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 0);
+ }
+ for (unsigned i = 2 * 256; i < 3 * 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 1,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 1);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-any-compute",
+ .start = group_any_compute,
+ .no_image = true,
+};
+
+static void
+group_all_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_all_info);
+
+ const uint32_t ssbo_size = 3 * 256 * 2 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ /* first workgroup: alternating 1's and 0's */
+ for (int i = 0; i < 256; i++)
+ map_out[2 * i] = (i % 2 == 0);
+ /* second workgroup: all 0 */
+ for (int i = 256; i < 2 * 256; i++)
+ map_out[2 * i] = 0;
+ /* third workgroup: all 1 */
+ for (int i = 2 * 256; i < 3 * 256; i++)
+ map_out[2 * i] = 1;
+
+ dispatch_and_wait(3, 1, 1);
+
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 0,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 0);
+ }
+ for (unsigned i = 256; i < 2 * 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 0,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 0);
+ }
+ for (unsigned i = 2 * 256; i < 3 * 256; i++) {
+ t_assertf(map_out[2 * i + 1] == 1,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i + 1], 1);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-all-compute",
+ .start = group_all_compute,
+ .no_image = true,
+};
+
+static void
+group_broadcast_compute(void)
+{
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModule(t_device, group_broadcast_info);
+
+ const uint32_t ssbo_size = 256 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(1, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 256; i++) {
+ t_assertf(map_out[i] == 42,
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[i], 42);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.group-broadcast-compute",
+ .start = group_broadcast_compute,
+ .no_image = true,
+};
+
+static void
+quad_perm_compute(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModuleGLSL(
+ t_device, COMPUTE,
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+///#extension GL_ARB_gpu_shader_int64 : enable
+ struct result {
+ uint a;
+ uint b;
+ };
+ layout(set = 0, binding = 0, std430) buffer Storage {
+ result ua[];
+ } ssbo;
+
+ layout (local_size_x = 64) in;
+
+ // emulate swizzleInvocationsAMD(gl_SubGroupInvocationARB, ...)
+ uint swizzleInvocations(uvec4 swizzle)
+ {
+ uint quad = gl_SubGroupInvocationARB & ~0x3u;
+ uint quad_idx = gl_SubGroupInvocationARB & 0x3u;
+ uint swizzled = quad + swizzle[quad_idx];
+ uint64_t active_set = ballotARB(true);
+ return bool(active_set & (1 << swizzled)) ? swizzled : 0;
+ }
+
+ void main()
+ {
+ ssbo.ua[gl_GlobalInvocationID.x].a = swizzleInvocationsAMD(uvec2(gl_SubGroupInvocationARB), uvec4(3u, 2u, 1u, 0u)).x;
+ ssbo.ua[gl_GlobalInvocationID.x].b = swizzleInvocations(uvec4(3u, 2u, 1u, 0u));
+ }
+ );
+
+ const uint32_t ssbo_size = 8 * 64 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(4, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 4 * 64; i++) {
+ t_assertf(map_out[2 * i] == map_out[2 * i + 1],
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i], map_out[2 * i + 1]);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.quad-perm-compute",
+ .start = quad_perm_compute,
+ .no_image = true,
+};
+
+static void
+swizzle_compute(void)
+{
+ t_require_ext("VK_EXT_shader_subgroup_ballot");
+ t_require_ext("VK_AMD_shader_ballot");
+
+ VkShaderModule cs = qoCreateShaderModuleGLSL(
+ t_device, COMPUTE,
+///#extension GL_ARB_shader_ballot : enable
+///#extension GL_AMD_shader_ballot : enable
+///#extension GL_ARB_gpu_shader_int64 : enable
+ struct result {
+ uint a;
+ uint b;
+ };
+ layout(set = 0, binding = 0, std430) buffer Storage {
+ result ua[];
+ } ssbo;
+
+ layout (local_size_x = 64) in;
+
+ // emulate swizzleInvocationsMaskedAMD(gl_SubGroupInvocationARB, ...)
+ uint swizzleInvocationsMasked(uvec3 mask)
+ {
+ uint i = gl_SubGroupInvocationARB;
+ // from the spec
+ uint j = (((i & 0x1f) & mask.x) | mask.y) ^ mask.z;
+ j |= (i & 0x20);
+ uint64_t active_set = ballotARB(true);
+ return bool(active_set & (1 << j)) ? j : 0;
+ }
+
+ void main()
+ {
+ ssbo.ua[gl_GlobalInvocationID.x].a = swizzleInvocationsMaskedAMD(gl_SubGroupInvocationARB, uvec3(0x1f, 2u, 1u));
+ ssbo.ua[gl_GlobalInvocationID.x].b = swizzleInvocationsMasked(uvec3(0x1f, 2u, 1u));
+ }
+ );
+
+ const uint32_t ssbo_size = 8 * 64 * sizeof(uint32_t);
+ VkDeviceMemory mem_out = common_init(cs, ssbo_size);
+
+ dispatch_and_wait(4, 1, 1);
+
+ uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0);
+ for (unsigned i = 0; i < 4 * 64; i++) {
+ t_assertf(map_out[2 * i] == map_out[2 * i + 1],
+ "buffer mismatch at uint %d: found %u, "
+ "expected %u", i, map_out[2 * i], map_out[2 * i + 1]);
+ }
+ t_pass();
+}
+
+test_define {
+ .name = "func.amd-shader-ballot.swizzle-compute",
+ .start = swizzle_compute,
+ .no_image = true,
+};
+
+static void
ballot_if_else(void)
{
t_require_ext("VK_EXT_shader_subgroup_ballot");
diff --git a/src/tests/func/shader_ballot/group_all.spv b/src/tests/func/shader_ballot/group_all.spv
new file mode 100644
index 0000000..e2fcb8c
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_all.spv
@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 40
+; Schema: 0
+ OpCapability Shader
+ OpCapability SubgroupVoteKHR
+ OpExtension "SPV_KHR_subgroup_vote"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_shader_group_vote"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %ua "ua"
+ OpName %PerThread "PerThread"
+ OpMemberName %PerThread 0 "ua"
+ OpMemberName %PerThread 1 "ub"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "per_thread"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpName %ub "ub"
+ OpMemberDecorate %PerThread 0 Offset 0
+ OpMemberDecorate %PerThread 1 Offset 4
+ OpDecorate %_runtimearr_PerThread ArrayStride 8
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+ %PerThread = OpTypeStruct %uint %uint
+%_runtimearr_PerThread = OpTypeRuntimeArray %PerThread
+ %Storage = OpTypeStruct %_runtimearr_PerThread
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %bool = OpTypeBool
+ %uint_1 = OpConstant %uint 1
+ %uint_2 = OpConstant %uint 2
+ %int_1 = OpConstant %int 1
+ %uint_256 = OpConstant %uint 256
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %ua = OpVariable %_ptr_Function_uint Function
+ %ub = OpVariable %_ptr_Function_uint Function
+ %21 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %22 = OpLoad %uint %21
+ %24 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %22 %int_0
+ %25 = OpLoad %uint %24
+ OpStore %ua %25
+ %27 = OpLoad %uint %ua
+ %29 = OpINotEqual %bool %27 %uint_0
+ %30 = OpGroupAll %bool %uint_2 %29
+ %32 = OpSelect %uint %30 %uint_1 %uint_0
+ OpStore %ub %32
+ %33 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %34 = OpLoad %uint %33
+ %36 = OpLoad %uint %ub
+ %37 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %34 %int_1
+ OpStore %37 %36
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_any.glsl b/src/tests/func/shader_ballot/group_any.glsl
new file mode 100644
index 0000000..b4aa04b
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_any.glsl
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_shader_group_vote : enable
+
+struct PerThread {
+ uint ua;
+ uint ub;
+};
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ PerThread per_thread[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ uint ua = ssbo.per_thread[gl_GlobalInvocationID.x].ua;
+ uint ub = uint(anyInvocationARB(bool(ua)));
+ ssbo.per_thread[gl_GlobalInvocationID.x].ub = ub;
+}
diff --git a/src/tests/func/shader_ballot/group_any.spv b/src/tests/func/shader_ballot/group_any.spv
new file mode 100644
index 0000000..9079361
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_any.spv
@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 40
+; Schema: 0
+ OpCapability Shader
+ OpCapability SubgroupVoteKHR
+ OpExtension "SPV_KHR_subgroup_vote"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_shader_group_vote"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %ua "ua"
+ OpName %PerThread "PerThread"
+ OpMemberName %PerThread 0 "ua"
+ OpMemberName %PerThread 1 "ub"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "per_thread"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpName %ub "ub"
+ OpMemberDecorate %PerThread 0 Offset 0
+ OpMemberDecorate %PerThread 1 Offset 4
+ OpDecorate %_runtimearr_PerThread ArrayStride 8
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+ %PerThread = OpTypeStruct %uint %uint
+%_runtimearr_PerThread = OpTypeRuntimeArray %PerThread
+ %Storage = OpTypeStruct %_runtimearr_PerThread
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %bool = OpTypeBool
+ %uint_1 = OpConstant %uint 1
+ %uint_2 = OpConstant %uint 2
+ %int_1 = OpConstant %int 1
+ %uint_256 = OpConstant %uint 256
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %ua = OpVariable %_ptr_Function_uint Function
+ %ub = OpVariable %_ptr_Function_uint Function
+ %21 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %22 = OpLoad %uint %21
+ %24 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %22 %int_0
+ %25 = OpLoad %uint %24
+ OpStore %ua %25
+ %27 = OpLoad %uint %ua
+ %29 = OpINotEqual %bool %27 %uint_0
+ %30 = OpGroupAny %bool %uint_2 %29
+ %32 = OpSelect %uint %30 %uint_1 %uint_0
+ OpStore %ub %32
+ %33 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %34 = OpLoad %uint %33
+ %36 = OpLoad %uint %ub
+ %37 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %34 %int_1
+ OpStore %37 %36
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_broadcast.spv b/src/tests/func/shader_ballot/group_broadcast.spv
new file mode 100644
index 0000000..7ccf486
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_broadcast.spv
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 29
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+ %uint_42 = OpConstant %uint 42
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %int_1 = OpConstant %int 1
+ %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %19 = OpLoad %uint %18
+ %22 = OpGroupBroadcast %uint %uint_2 %19 %uint_42
+ %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19
+ OpStore %25 %22
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd.glsl b/src/tests/func/shader_ballot/group_iadd.glsl
new file mode 100644
index 0000000..398ef0b
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd.glsl
@@ -0,0 +1,16 @@
+#version 450
+#extension GL_AMD_shader_ballot : enable
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ uint ua[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsAMD(1);
+}
diff --git a/src/tests/func/shader_ballot/group_iadd.spv b/src/tests/func/shader_ballot/group_iadd.spv
new file mode 100644
index 0000000..7d306e1
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd.spv
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 29
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %int_1 = OpConstant %int 1
+ %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %19 = OpLoad %uint %18
+ %22 = OpGroupIAdd %int %uint_2 Reduce %int_1
+ %23 = OpBitcast %uint %22
+ %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19
+ OpStore %25 %23
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd_excl.glsl b/src/tests/func/shader_ballot/group_iadd_excl.glsl
new file mode 100644
index 0000000..7d279ef
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_excl.glsl
@@ -0,0 +1,16 @@
+#version 450
+#extension GL_AMD_shader_ballot : enable
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ uint ua[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsExclusiveScanAMD(1);
+}
diff --git a/src/tests/func/shader_ballot/group_iadd_excl.spv b/src/tests/func/shader_ballot/group_iadd_excl.spv
new file mode 100644
index 0000000..f66219e
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_excl.spv
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 29
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %int_1 = OpConstant %int 1
+ %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %19 = OpLoad %uint %18
+ %22 = OpGroupIAdd %int %uint_2 ExclusiveScan %int_1
+ %23 = OpBitcast %uint %22
+ %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19
+ OpStore %25 %23
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv
new file mode 100644
index 0000000..6dbd42a
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv
@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 37
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ OpExtension "SPV_AMD_shader_ballot"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+ %uint_2 = OpConstant %uint 2
+ %uint_0 = OpConstant %uint 0
+ %bool = OpTypeBool
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %9 = OpLoad %uint %gl_LocalInvocationIndex
+ %11 = OpUMod %uint %9 %uint_2
+ %14 = OpIEqual %bool %11 %uint_0
+ OpSelectionMerge %16 None
+ OpBranchConditional %14 %15 %16
+ %15 = OpLabel
+ %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %27 = OpLoad %uint %26
+ %30 = OpGroupIAddNonUniformAMD %int %uint_2 ExclusiveScan %int_1
+ %31 = OpBitcast %uint %30
+ %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27
+ OpStore %33 %31
+ OpBranch %16
+ %16 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd_incl.glsl b/src/tests/func/shader_ballot/group_iadd_incl.glsl
new file mode 100644
index 0000000..d646ef7
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_incl.glsl
@@ -0,0 +1,16 @@
+#version 450
+#extension GL_AMD_shader_ballot : enable
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ uint ua[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsInclusiveScanAMD(1);
+}
diff --git a/src/tests/func/shader_ballot/group_iadd_incl.spv b/src/tests/func/shader_ballot/group_iadd_incl.spv
new file mode 100644
index 0000000..9c22bd5
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_incl.spv
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 29
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %int_1 = OpConstant %int 1
+ %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %19 = OpLoad %uint %18
+ %22 = OpGroupIAdd %int %uint_2 InclusiveScan %int_1
+ %23 = OpBitcast %uint %22
+ %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19
+ OpStore %25 %23
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl
new file mode 100644
index 0000000..45bf949
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_AMD_shader_ballot : enable
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ uint ua[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ if (gl_LocalInvocationIndex % 2 == 0) {
+ ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsInclusiveScanNonUniformAMD(1);
+ }
+}
diff --git a/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv
new file mode 100644
index 0000000..2638553
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv
@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 37
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ OpExtension "SPV_AMD_shader_ballot"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+ %uint_2 = OpConstant %uint 2
+ %uint_0 = OpConstant %uint 0
+ %bool = OpTypeBool
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %9 = OpLoad %uint %gl_LocalInvocationIndex
+ %11 = OpUMod %uint %9 %uint_2
+ %14 = OpIEqual %bool %11 %uint_0
+ OpSelectionMerge %16 None
+ OpBranchConditional %14 %15 %16
+ %15 = OpLabel
+ %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %27 = OpLoad %uint %26
+ %30 = OpGroupIAddNonUniformAMD %int %uint_2 InclusiveScan %int_1
+ %31 = OpBitcast %uint %30
+ %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27
+ OpStore %33 %31
+ OpBranch %16
+ %16 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl b/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl
new file mode 100644
index 0000000..1be8dd3
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_AMD_shader_ballot : enable
+
+layout(set = 0, binding = 0, std430) buffer Storage {
+ uint ua[];
+} ssbo;
+
+layout (local_size_x = 256, local_size_y = 1) in;
+
+void main()
+{
+ // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup
+ // scope. There's currently no way to express an OpGroupIAdd with workgroup
+ // scope, so I've changed the SPIR-V assembly myself.
+ if (gl_LocalInvocationIndex % 2 == 0) {
+ ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsNonUniformAMD(1);
+ }
+}
diff --git a/src/tests/func/shader_ballot/group_iadd_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_nonuniform.spv
new file mode 100644
index 0000000..ad8ca5c
--- /dev/null
+++ b/src/tests/func/shader_ballot/group_iadd_nonuniform.spv
@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 37
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ OpExtension "SPV_AMD_shader_ballot"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+ %uint_2 = OpConstant %uint 2
+ %uint_0 = OpConstant %uint 0
+ %bool = OpTypeBool
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %9 = OpLoad %uint %gl_LocalInvocationIndex
+ %11 = OpUMod %uint %9 %uint_2
+ %14 = OpIEqual %bool %11 %uint_0
+ OpSelectionMerge %16 None
+ OpBranchConditional %14 %15 %16
+ %15 = OpLabel
+ %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %27 = OpLoad %uint %26
+ %30 = OpGroupIAddNonUniformAMD %int %uint_2 Reduce %int_1
+ %31 = OpBitcast %uint %30
+ %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27
+ OpStore %33 %31
+ OpBranch %16
+ %16 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/src/tests/func/shader_ballot/test.spv b/src/tests/func/shader_ballot/test.spv
new file mode 100644
index 0000000..37cd930
--- /dev/null
+++ b/src/tests/func/shader_ballot/test.spv
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 1
+; Bound: 29
+; Schema: 0
+ OpCapability Shader
+ OpCapability Groups
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+ OpExecutionMode %main LocalSize 256 1 1
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_shader_ballot"
+ OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+ OpSourceExtension "GL_GOOGLE_include_directive"
+ OpName %main "main"
+ OpName %Storage "Storage"
+ OpMemberName %Storage 0 "ua"
+ OpName %ssbo "ssbo"
+ OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %Storage 0 Offset 0
+ OpDecorate %Storage BufferBlock
+ OpDecorate %ssbo DescriptorSet 0
+ OpDecorate %ssbo Binding 0
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %Storage = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage
+ %ssbo = OpVariable %_ptr_Uniform_Storage Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %int_1 = OpConstant %int 1
+ %uint_3 = OpConstant %uint 3
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+ %uint_256 = OpConstant %uint 256
+ %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+ %19 = OpLoad %uint %18
+ %22 = OpGroupIAdd %int %uint_3 ExclusiveScan %int_1
+ %23 = OpBitcast %uint %22
+ %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19
+ OpStore %25 %23
+ OpReturn
+ OpFunctionEnd