diff options
21 files changed, 1678 insertions, 0 deletions
@@ -10,6 +10,7 @@ # generated files *-spirv.h +*-spirv-as.h /data/grass-*.jpg /data/grass-*.png /data/pink-leaves-*.jpg diff --git a/Makefile.am b/Makefile.am index 7e9be29..64da935 100644 --- a/Makefile.am +++ b/Makefile.am @@ -141,6 +141,15 @@ BUILT_SOURCES = \ src/tests/func/shader/pack_unpack-spirv.h \ src/tests/func/shader_ballot/ext_shader_ballot-spirv.h \ src/tests/func/shader_ballot/amd_shader_ballot-spirv.h \ + src/tests/func/shader_ballot/group_all-spirv-as.h \ + src/tests/func/shader_ballot/group_any-spirv-as.h \ + src/tests/func/shader_ballot/group_broadcast-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd_nonuniform-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd_excl-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd_excl_nonuniform-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd_incl-spirv-as.h \ + src/tests/func/shader_ballot/group_iadd_incl_nonuniform-spirv-as.h \ src/tests/func/shader_group_vote/ext_shader_subgroup_vote-spirv.h \ src/tests/func/ssbo/interleave-spirv.h \ src/tests/func/sync/semaphore-fd-spirv.h \ @@ -152,6 +161,9 @@ bin_crucible_LDADD = $(MESA_LDFLAGS) -lm -lvulkan -lpthread $(libpng_LIBS) \ %-spirv.h: %.c misc/glsl_scraper.py $(AM_V_GEN) $(PYTHON3) $(srcdir)/misc/glsl_scraper.py --with-glslc=$(GLSLC) -o $@ $< +%-spirv-as.h: %.spv misc/spirv_as.py + $(AM_V_GEN) $(PYTHON3) $(srcdir)/misc/spirv_as.py --with-spirv-as=$(SPIRV_AS) -o $@ $< + %_gen.c: %_gen.py $(AM_V_GEN) $(PYTHON3) $< diff --git a/configure.ac b/configure.ac index 9e137e7..74027d8 100644 --- a/configure.ac +++ b/configure.ac @@ -55,6 +55,13 @@ if test "x$GLSLC" = "x"; then AC_MSG_ERROR([failed to find glslc]) fi +AC_ARG_VAR([SPIRV_AS], [path to spirv-as executable]) +AC_SUBST([SPIRV_AS]) +AC_CHECK_PROGS([SPIRV_AS], [spirv-as]) +if test "x$SPIRV_AS" = "x"; then + AC_MSG_ERROR([failed to find spirv-as]) +fi + PKG_CHECK_MODULES([libpng16], [libpng16], [HAVE_LIBPNG16=1], [HAVE_LIBPNG16=0]) AM_CONDITIONAL([NO_PNG16], [test $HAVE_LIBPNG16 = 0]) AM_COND_IF([NO_PNG16], diff --git a/misc/spirv_as.py b/misc/spirv_as.py new file mode 100644 index 0000000..5e31157 --- /dev/null +++ b/misc/spirv_as.py @@ -0,0 +1,119 @@ +#! /usr/bin/env python3 + +import argparse +import io +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +from textwrap import dedent + +class ShaderCompileError(RuntimeError): + def __init__(self, *args): + super(ShaderCompileError, self).__init__(*args) + +class Shader: + def __init__(self, in_file): + self.dwords = None + self.in_file = in_file + self.name = os.path.splitext(os.path.basename(in_file))[0] + + def __run_spirv_as(self): + with subprocess.Popen([spirv_as] + + ['-o', '-', self.in_file], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) as proc: + + out, err = proc.communicate(timeout=30) + + if proc.returncode != 0: + # Unfortunately, glslang dumps errors to standard out. + # However, since we don't really want to count on that, + # we'll grab the output of both + message = out.decode('utf-8') + '\n' + err.decode('utf-8') + raise ShaderCompileError(message.strip()) + + return out + + def compile(self): + def dwords(f): + while True: + dword_str = f.read(4) + if not dword_str: + return + assert len(dword_str) == 4 + yield struct.unpack('I', dword_str)[0] + + spirv = self.__run_spirv_as() + self.dwords = list(dwords(io.BytesIO(spirv))) + + def dump_c_code(self, f): + f.write('static const uint32_t __{0}_spir_v_src[] = {{'.format(self.name)) + line_start = 0 + while line_start < len(self.dwords): + f.write('\n ') + for i in range(line_start, min(line_start + 6, len(self.dwords))): + f.write(' 0x{:08x},'.format(self.dwords[i])) + line_start += 6 + f.write('\n};\n\n') + + f.write(dedent("""\ + #define {0}_info \ + .spirvSize = sizeof(__{0}_spir_v_src), \ + .pSpirv = __{0}_spir_v_src + """.format(self.name))) + +def open_file(name, mode): + if name == '-': + if mode == 'w': + return sys.stdout + elif mode == 'r': + return sys.stdin + else: + assert False + else: + return open(name, mode) + +def parse_args(): + description = dedent("""\ + This program assembles a SPIR-V shader and emits a header with the + resulting binary that can be included by a test. + + If '-' is passed as the input file or output file, stdin or stdout + will be used instead of a file on disc.""") + + p = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-o', '--outfile', default='-', + help='Output to the given file (default: stdout).') + p.add_argument('--with-spirv-as', metavar='PATH', + default='spirv-as', + dest='spirv_as', + help='Full path to the spirv-as assembler.') + p.add_argument('infile', metavar='INFILE') + + return p.parse_args() + + +args = parse_args() +infname = args.infile +outfname = args.outfile +spirv_as = args.spirv_as + +shader = Shader(infname) +shader.compile() + +with open_file(outfname, 'w') as outfile: + outfile.write(dedent("""\ + /* ========================== DO NOT EDIT! ========================== + * This file is autogenerated by spirv_as.py. + */ + + #include <stdint.h> + + """)) + shader.dump_c_code(outfile) diff --git a/src/tests/func/shader_ballot/amd_shader_ballot.c b/src/tests/func/shader_ballot/amd_shader_ballot.c index 5f88ba9..c4e6ee8 100644 --- a/src/tests/func/shader_ballot/amd_shader_ballot.c +++ b/src/tests/func/shader_ballot/amd_shader_ballot.c @@ -24,6 +24,15 @@ #include <stdio.h> #include "amd_shader_ballot-spirv.h" +#include "group_iadd-spirv-as.h" +#include "group_iadd_nonuniform-spirv-as.h" +#include "group_iadd_incl-spirv-as.h" +#include "group_iadd_incl_nonuniform-spirv-as.h" +#include "group_iadd_excl-spirv-as.h" +#include "group_iadd_excl_nonuniform-spirv-as.h" +#include "group_all-spirv-as.h" +#include "group_any-spirv-as.h" +#include "group_broadcast-spirv-as.h" static void basic(void) @@ -62,6 +71,336 @@ test_define { }; static void +fadd(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + uint bitCount64(uint64_t val) { + uvec2 split = unpackUint2x32(val); + return bitCount(split.x) + bitCount(split.y); + } + + void main() { + float sum = addInvocationsNonUniformAMD(1.); + + uint count = bitCount64(ballotARB(true)); + if (sum != float(count)) { + f_color = vec4(1.0, float(sum) / 255., float(count) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.fadd", + .start = fadd, + .image_filename = "32x32-green.ref.png", +}; + +static void +fadd_double(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + uint bitCount64(uint64_t val) { + uvec2 split = unpackUint2x32(val); + return bitCount(split.x) + bitCount(split.y); + } + + void main() { + double sum = addInvocationsNonUniformAMD(1.0LF); + + uint count = bitCount64(ballotARB(true)); + if (sum != double(count)) { + f_color = vec4(1.0, float(sum) / 255., float(count) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.fadd-double", + .start = fadd_double, + .image_filename = "32x32-green.ref.png", +}; + +static void +umin(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findLSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int lsb = findLSB(split.x); + if (lsb >= 0) + return lsb; + + lsb = findLSB(split.y); + if (lsb >= 0) + return 32 + lsb; + + return -1; + } + + void main() { + uint minInvocation = minInvocationsNonUniformAMD(gl_SubGroupInvocationARB); + + uint minInvocation2 = findLSB64(ballotARB(true)); + if (minInvocation != minInvocation2) { + f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.umin", + .start = umin, + .image_filename = "32x32-green.ref.png", +}; + +static void +imin(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findLSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int lsb = findLSB(split.x); + if (lsb >= 0) + return lsb; + + lsb = findLSB(split.y); + if (lsb >= 0) + return 32 + lsb; + + return -1; + } + + void main() { + uint minInvocation = uint(minInvocationsNonUniformAMD(int(gl_SubGroupInvocationARB))); + + uint minInvocation2 = findLSB64(ballotARB(true)); + if (minInvocation != minInvocation2) { + f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.imin", + .start = imin, + .image_filename = "32x32-green.ref.png", +}; + +static void +test_fmin(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findLSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int lsb = findLSB(split.x); + if (lsb >= 0) + return lsb; + + lsb = findLSB(split.y); + if (lsb >= 0) + return 32 + lsb; + + return -1; + } + + void main() { + uint minInvocation = uint(minInvocationsNonUniformAMD(float(gl_SubGroupInvocationARB))); + + uint minInvocation2 = findLSB64(ballotARB(true)); + if (minInvocation != minInvocation2) { + f_color = vec4(1.0, float(minInvocation) / 255., float(minInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.fmin", + .start = test_fmin, + .image_filename = "32x32-green.ref.png", +}; + +static void +umax(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findMSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int msb = findMSB(split.y); + if (msb >= 0) + return 32 + msb; + + return findMSB(split.x); + } + + void main() { + uint maxInvocation = maxInvocationsNonUniformAMD(gl_SubGroupInvocationARB); + + uint maxInvocation2 = findMSB64(ballotARB(true)); + if (maxInvocation != maxInvocation2) { + f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.umax", + .start = umax, + .image_filename = "32x32-green.ref.png", +}; + +static void +imax(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findMSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int msb = findMSB(split.y); + if (msb >= 0) + return 32 + msb; + + return findMSB(split.x); + } + + void main() { + uint maxInvocation = uint(maxInvocationsNonUniformAMD(int(gl_SubGroupInvocationARB))); + + uint maxInvocation2 = findMSB64(ballotARB(true)); + if (maxInvocation != maxInvocation2) { + f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.imax", + .start = imax, + .image_filename = "32x32-green.ref.png", +}; + +static void +test_fmax(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + int findMSB64(uint64_t v) + { + uvec2 split = unpackUint2x32(v); + int msb = findMSB(split.y); + if (msb >= 0) + return 32 + msb; + + return findMSB(split.x); + } + + void main() { + uint maxInvocation = uint(maxInvocationsNonUniformAMD(float(gl_SubGroupInvocationARB))); + + uint maxInvocation2 = findMSB64(ballotARB(true)); + if (maxInvocation != maxInvocation2) { + f_color = vec4(1.0, float(maxInvocation) / 255., float(maxInvocation2) / 255., 1.0); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.fmax", + .start = test_fmax, + .image_filename = "32x32-green.ref.png", +}; + +static void inclusive_scan_iadd(void) { t_require_ext("VK_EXT_shader_subgroup_ballot"); @@ -104,6 +443,48 @@ test_define { .image_filename = "32x32-green.ref.png", }; +static void +exclusive_scan_iadd(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + VkShaderModule fs = qoCreateShaderModuleGLSL(t_device, FRAGMENT, +///#extension GL_ARB_gpu_shader_int64 : enable +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable + layout(location = 0) out vec4 f_color; + + uint bitCount64(uint64_t val) { + uvec2 split = unpackUint2x32(val); + return bitCount(split.x) + bitCount(split.y); + } + + uint mbcnt() { + uint64_t active_set = ballotARB(true); + uint invocation = gl_SubGroupInvocationARB; + uint64_t mask_le = invocation == 63 ? ~0ul : (1ul << (invocation + 1)) - 1; + return bitCount64(active_set & mask_le); + } + + void main() { + int sum = addInvocationsExclusiveScanNonUniformAMD(1); + + int count = int(mbcnt()) - 1; + if (sum != count) { + f_color = vec4(1.0, float(sum) / 255., 0.0, float(gl_SubGroupInvocationARB) / 255.); + } else { + f_color = vec4(0.0, 1.0, 0.0, 1.0); + } + } + ); + run_simple_pipeline(fs, NULL, 0); +} + +test_define { + .name = "func.amd-shader-ballot.exclusive-scan-iadd", + .start = exclusive_scan_iadd, + .image_filename = "32x32-green.ref.png", +}; static VkDeviceMemory common_init(VkShaderModule cs, const uint32_t ssbo_size) { @@ -244,6 +625,404 @@ test_define { }; static void +group_iadd_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[i] == 256, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], 4 * 64); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-compute", + .start = group_iadd_compute, + .no_image = true, +}; + +static void +group_iadd_nonuniform_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_nonuniform_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i += 2) { + t_assertf(map_out[i] == 256 / 2, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], 4 * 64); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-nonuniform-compute", + .start = group_iadd_nonuniform_compute, + .no_image = true, +}; + +static void +group_iadd_inclusive_scan_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_incl_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[i] == i + 1, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], i + 1); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-inclusive-scan-compute", + .start = group_iadd_inclusive_scan_compute, + .no_image = true, +}; + +static void +group_iadd_inclusive_scan_nonuniform_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_incl_nonuniform_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i += 2) { + t_assertf(map_out[i] == i / 2 + 1, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], i / 2 + 1); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-inclusive-scan-nonuniform-compute", + .start = group_iadd_inclusive_scan_nonuniform_compute, + .no_image = true, +}; + +static void +group_iadd_exclusive_scan_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_excl_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[i] == i, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], i); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-exclusive-scan-compute", + .start = group_iadd_exclusive_scan_compute, + .no_image = true, +}; + +static void +group_iadd_exclusive_scan_nonuniform_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_iadd_excl_nonuniform_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i += 2) { + t_assertf(map_out[i] == i / 2, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], i / 2); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-iadd-exclusive-scan-nonuniform-compute", + .start = group_iadd_exclusive_scan_nonuniform_compute, + .no_image = true, +}; + +static void +group_any_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_any_info); + + const uint32_t ssbo_size = 3 * 256 * 2 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + /* first workgroup: alternating 1's and 0's */ + for (int i = 0; i < 256; i++) + map_out[2 * i] = (i % 2 == 0); + /* second workgroup: all 0 */ + for (int i = 256; i < 2 * 256; i++) + map_out[2 * i] = 0; + /* third workgroup: all 1 */ + for (int i = 2 * 256; i < 3 * 256; i++) + map_out[2 * i] = 1; + + dispatch_and_wait(3, 1, 1); + + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[2 * i + 1] == 1, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 1); + } + for (unsigned i = 256; i < 2 * 256; i++) { + t_assertf(map_out[2 * i + 1] == 0, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 0); + } + for (unsigned i = 2 * 256; i < 3 * 256; i++) { + t_assertf(map_out[2 * i + 1] == 1, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 1); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-any-compute", + .start = group_any_compute, + .no_image = true, +}; + +static void +group_all_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_all_info); + + const uint32_t ssbo_size = 3 * 256 * 2 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + /* first workgroup: alternating 1's and 0's */ + for (int i = 0; i < 256; i++) + map_out[2 * i] = (i % 2 == 0); + /* second workgroup: all 0 */ + for (int i = 256; i < 2 * 256; i++) + map_out[2 * i] = 0; + /* third workgroup: all 1 */ + for (int i = 2 * 256; i < 3 * 256; i++) + map_out[2 * i] = 1; + + dispatch_and_wait(3, 1, 1); + + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[2 * i + 1] == 0, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 0); + } + for (unsigned i = 256; i < 2 * 256; i++) { + t_assertf(map_out[2 * i + 1] == 0, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 0); + } + for (unsigned i = 2 * 256; i < 3 * 256; i++) { + t_assertf(map_out[2 * i + 1] == 1, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i + 1], 1); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-all-compute", + .start = group_all_compute, + .no_image = true, +}; + +static void +group_broadcast_compute(void) +{ + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModule(t_device, group_broadcast_info); + + const uint32_t ssbo_size = 256 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(1, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 256; i++) { + t_assertf(map_out[i] == 42, + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[i], 42); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.group-broadcast-compute", + .start = group_broadcast_compute, + .no_image = true, +}; + +static void +quad_perm_compute(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModuleGLSL( + t_device, COMPUTE, +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable +///#extension GL_ARB_gpu_shader_int64 : enable + struct result { + uint a; + uint b; + }; + layout(set = 0, binding = 0, std430) buffer Storage { + result ua[]; + } ssbo; + + layout (local_size_x = 64) in; + + // emulate swizzleInvocationsAMD(gl_SubGroupInvocationARB, ...) + uint swizzleInvocations(uvec4 swizzle) + { + uint quad = gl_SubGroupInvocationARB & ~0x3u; + uint quad_idx = gl_SubGroupInvocationARB & 0x3u; + uint swizzled = quad + swizzle[quad_idx]; + uint64_t active_set = ballotARB(true); + return bool(active_set & (1 << swizzled)) ? swizzled : 0; + } + + void main() + { + ssbo.ua[gl_GlobalInvocationID.x].a = swizzleInvocationsAMD(uvec2(gl_SubGroupInvocationARB), uvec4(3u, 2u, 1u, 0u)).x; + ssbo.ua[gl_GlobalInvocationID.x].b = swizzleInvocations(uvec4(3u, 2u, 1u, 0u)); + } + ); + + const uint32_t ssbo_size = 8 * 64 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(4, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 4 * 64; i++) { + t_assertf(map_out[2 * i] == map_out[2 * i + 1], + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i], map_out[2 * i + 1]); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.quad-perm-compute", + .start = quad_perm_compute, + .no_image = true, +}; + +static void +swizzle_compute(void) +{ + t_require_ext("VK_EXT_shader_subgroup_ballot"); + t_require_ext("VK_AMD_shader_ballot"); + + VkShaderModule cs = qoCreateShaderModuleGLSL( + t_device, COMPUTE, +///#extension GL_ARB_shader_ballot : enable +///#extension GL_AMD_shader_ballot : enable +///#extension GL_ARB_gpu_shader_int64 : enable + struct result { + uint a; + uint b; + }; + layout(set = 0, binding = 0, std430) buffer Storage { + result ua[]; + } ssbo; + + layout (local_size_x = 64) in; + + // emulate swizzleInvocationsMaskedAMD(gl_SubGroupInvocationARB, ...) + uint swizzleInvocationsMasked(uvec3 mask) + { + uint i = gl_SubGroupInvocationARB; + // from the spec + uint j = (((i & 0x1f) & mask.x) | mask.y) ^ mask.z; + j |= (i & 0x20); + uint64_t active_set = ballotARB(true); + return bool(active_set & (1 << j)) ? j : 0; + } + + void main() + { + ssbo.ua[gl_GlobalInvocationID.x].a = swizzleInvocationsMaskedAMD(gl_SubGroupInvocationARB, uvec3(0x1f, 2u, 1u)); + ssbo.ua[gl_GlobalInvocationID.x].b = swizzleInvocationsMasked(uvec3(0x1f, 2u, 1u)); + } + ); + + const uint32_t ssbo_size = 8 * 64 * sizeof(uint32_t); + VkDeviceMemory mem_out = common_init(cs, ssbo_size); + + dispatch_and_wait(4, 1, 1); + + uint32_t *map_out = qoMapMemory(t_device, mem_out, 0, ssbo_size, 0); + for (unsigned i = 0; i < 4 * 64; i++) { + t_assertf(map_out[2 * i] == map_out[2 * i + 1], + "buffer mismatch at uint %d: found %u, " + "expected %u", i, map_out[2 * i], map_out[2 * i + 1]); + } + t_pass(); +} + +test_define { + .name = "func.amd-shader-ballot.swizzle-compute", + .start = swizzle_compute, + .no_image = true, +}; + +static void ballot_if_else(void) { t_require_ext("VK_EXT_shader_subgroup_ballot"); diff --git a/src/tests/func/shader_ballot/group_all.spv b/src/tests/func/shader_ballot/group_all.spv new file mode 100644 index 0000000..e2fcb8c --- /dev/null +++ b/src/tests/func/shader_ballot/group_all.spv @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability SubgroupVoteKHR + OpExtension "SPV_KHR_subgroup_vote" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_ARB_shader_group_vote" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %ua "ua" + OpName %PerThread "PerThread" + OpMemberName %PerThread 0 "ua" + OpMemberName %PerThread 1 "ub" + OpName %Storage "Storage" + OpMemberName %Storage 0 "per_thread" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %ub "ub" + OpMemberDecorate %PerThread 0 Offset 0 + OpMemberDecorate %PerThread 1 Offset 4 + OpDecorate %_runtimearr_PerThread ArrayStride 8 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %PerThread = OpTypeStruct %uint %uint +%_runtimearr_PerThread = OpTypeRuntimeArray %PerThread + %Storage = OpTypeStruct %_runtimearr_PerThread +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %int_1 = OpConstant %int 1 + %uint_256 = OpConstant %uint 256 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %ua = OpVariable %_ptr_Function_uint Function + %ub = OpVariable %_ptr_Function_uint Function + %21 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %22 = OpLoad %uint %21 + %24 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %22 %int_0 + %25 = OpLoad %uint %24 + OpStore %ua %25 + %27 = OpLoad %uint %ua + %29 = OpINotEqual %bool %27 %uint_0 + %30 = OpGroupAll %bool %uint_2 %29 + %32 = OpSelect %uint %30 %uint_1 %uint_0 + OpStore %ub %32 + %33 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %34 = OpLoad %uint %33 + %36 = OpLoad %uint %ub + %37 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %34 %int_1 + OpStore %37 %36 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_any.glsl b/src/tests/func/shader_ballot/group_any.glsl new file mode 100644 index 0000000..b4aa04b --- /dev/null +++ b/src/tests/func/shader_ballot/group_any.glsl @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_shader_group_vote : enable + +struct PerThread { + uint ua; + uint ub; +}; + +layout(set = 0, binding = 0, std430) buffer Storage { + PerThread per_thread[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + uint ua = ssbo.per_thread[gl_GlobalInvocationID.x].ua; + uint ub = uint(anyInvocationARB(bool(ua))); + ssbo.per_thread[gl_GlobalInvocationID.x].ub = ub; +} diff --git a/src/tests/func/shader_ballot/group_any.spv b/src/tests/func/shader_ballot/group_any.spv new file mode 100644 index 0000000..9079361 --- /dev/null +++ b/src/tests/func/shader_ballot/group_any.spv @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability SubgroupVoteKHR + OpExtension "SPV_KHR_subgroup_vote" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_ARB_shader_group_vote" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %ua "ua" + OpName %PerThread "PerThread" + OpMemberName %PerThread 0 "ua" + OpMemberName %PerThread 1 "ub" + OpName %Storage "Storage" + OpMemberName %Storage 0 "per_thread" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %ub "ub" + OpMemberDecorate %PerThread 0 Offset 0 + OpMemberDecorate %PerThread 1 Offset 4 + OpDecorate %_runtimearr_PerThread ArrayStride 8 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %PerThread = OpTypeStruct %uint %uint +%_runtimearr_PerThread = OpTypeRuntimeArray %PerThread + %Storage = OpTypeStruct %_runtimearr_PerThread +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %int_1 = OpConstant %int 1 + %uint_256 = OpConstant %uint 256 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %ua = OpVariable %_ptr_Function_uint Function + %ub = OpVariable %_ptr_Function_uint Function + %21 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %22 = OpLoad %uint %21 + %24 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %22 %int_0 + %25 = OpLoad %uint %24 + OpStore %ua %25 + %27 = OpLoad %uint %ua + %29 = OpINotEqual %bool %27 %uint_0 + %30 = OpGroupAny %bool %uint_2 %29 + %32 = OpSelect %uint %30 %uint_1 %uint_0 + OpStore %ub %32 + %33 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %34 = OpLoad %uint %33 + %36 = OpLoad %uint %ub + %37 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %34 %int_1 + OpStore %37 %36 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_broadcast.spv b/src/tests/func/shader_ballot/group_broadcast.spv new file mode 100644 index 0000000..7ccf486 --- /dev/null +++ b/src/tests/func/shader_ballot/group_broadcast.spv @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpCapability Groups + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %uint_42 = OpConstant %uint 42 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %22 = OpGroupBroadcast %uint %uint_2 %19 %uint_42 + %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19 + OpStore %25 %22 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd.glsl b/src/tests/func/shader_ballot/group_iadd.glsl new file mode 100644 index 0000000..398ef0b --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd.glsl @@ -0,0 +1,16 @@ +#version 450 +#extension GL_AMD_shader_ballot : enable + +layout(set = 0, binding = 0, std430) buffer Storage { + uint ua[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsAMD(1); +} diff --git a/src/tests/func/shader_ballot/group_iadd.spv b/src/tests/func/shader_ballot/group_iadd.spv new file mode 100644 index 0000000..7d306e1 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd.spv @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpCapability Groups + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %22 = OpGroupIAdd %int %uint_2 Reduce %int_1 + %23 = OpBitcast %uint %22 + %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19 + OpStore %25 %23 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd_excl.glsl b/src/tests/func/shader_ballot/group_iadd_excl.glsl new file mode 100644 index 0000000..7d279ef --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_excl.glsl @@ -0,0 +1,16 @@ +#version 450 +#extension GL_AMD_shader_ballot : enable + +layout(set = 0, binding = 0, std430) buffer Storage { + uint ua[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsExclusiveScanAMD(1); +} diff --git a/src/tests/func/shader_ballot/group_iadd_excl.spv b/src/tests/func/shader_ballot/group_iadd_excl.spv new file mode 100644 index 0000000..f66219e --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_excl.spv @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpCapability Groups + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %22 = OpGroupIAdd %int %uint_2 ExclusiveScan %int_1 + %23 = OpBitcast %uint %22 + %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19 + OpStore %25 %23 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv new file mode 100644 index 0000000..6dbd42a --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_excl_nonuniform.spv @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 37 +; Schema: 0 + OpCapability Shader + OpCapability Groups + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %uint_2 = OpConstant %uint 2 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %9 = OpLoad %uint %gl_LocalInvocationIndex + %11 = OpUMod %uint %9 %uint_2 + %14 = OpIEqual %bool %11 %uint_0 + OpSelectionMerge %16 None + OpBranchConditional %14 %15 %16 + %15 = OpLabel + %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %27 = OpLoad %uint %26 + %30 = OpGroupIAddNonUniformAMD %int %uint_2 ExclusiveScan %int_1 + %31 = OpBitcast %uint %30 + %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27 + OpStore %33 %31 + OpBranch %16 + %16 = OpLabel + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd_incl.glsl b/src/tests/func/shader_ballot/group_iadd_incl.glsl new file mode 100644 index 0000000..d646ef7 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_incl.glsl @@ -0,0 +1,16 @@ +#version 450 +#extension GL_AMD_shader_ballot : enable + +layout(set = 0, binding = 0, std430) buffer Storage { + uint ua[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsInclusiveScanAMD(1); +} diff --git a/src/tests/func/shader_ballot/group_iadd_incl.spv b/src/tests/func/shader_ballot/group_iadd_incl.spv new file mode 100644 index 0000000..9c22bd5 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_incl.spv @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpCapability Groups + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %22 = OpGroupIAdd %int %uint_2 InclusiveScan %int_1 + %23 = OpBitcast %uint %22 + %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19 + OpStore %25 %23 + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl new file mode 100644 index 0000000..45bf949 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.glsl @@ -0,0 +1,18 @@ +#version 450 +#extension GL_AMD_shader_ballot : enable + +layout(set = 0, binding = 0, std430) buffer Storage { + uint ua[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + if (gl_LocalInvocationIndex % 2 == 0) { + ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsInclusiveScanNonUniformAMD(1); + } +} diff --git a/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv new file mode 100644 index 0000000..2638553 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_incl_nonuniform.spv @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 37 +; Schema: 0 + OpCapability Shader + OpCapability Groups + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %uint_2 = OpConstant %uint 2 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %9 = OpLoad %uint %gl_LocalInvocationIndex + %11 = OpUMod %uint %9 %uint_2 + %14 = OpIEqual %bool %11 %uint_0 + OpSelectionMerge %16 None + OpBranchConditional %14 %15 %16 + %15 = OpLabel + %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %27 = OpLoad %uint %26 + %30 = OpGroupIAddNonUniformAMD %int %uint_2 InclusiveScan %int_1 + %31 = OpBitcast %uint %30 + %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27 + OpStore %33 %31 + OpBranch %16 + %16 = OpLabel + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl b/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl new file mode 100644 index 0000000..1be8dd3 --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_nonuniform.glsl @@ -0,0 +1,18 @@ +#version 450 +#extension GL_AMD_shader_ballot : enable + +layout(set = 0, binding = 0, std430) buffer Storage { + uint ua[]; +} ssbo; + +layout (local_size_x = 256, local_size_y = 1) in; + +void main() +{ + // Note: in SPIR-V, this gets translated to an OpGroupIAdd with subgroup + // scope. There's currently no way to express an OpGroupIAdd with workgroup + // scope, so I've changed the SPIR-V assembly myself. + if (gl_LocalInvocationIndex % 2 == 0) { + ssbo.ua[gl_GlobalInvocationID.x] = addInvocationsNonUniformAMD(1); + } +} diff --git a/src/tests/func/shader_ballot/group_iadd_nonuniform.spv b/src/tests/func/shader_ballot/group_iadd_nonuniform.spv new file mode 100644 index 0000000..ad8ca5c --- /dev/null +++ b/src/tests/func/shader_ballot/group_iadd_nonuniform.spv @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 37 +; Schema: 0 + OpCapability Shader + OpCapability Groups + OpExtension "SPV_AMD_shader_ballot" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %uint_2 = OpConstant %uint 2 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %9 = OpLoad %uint %gl_LocalInvocationIndex + %11 = OpUMod %uint %9 %uint_2 + %14 = OpIEqual %bool %11 %uint_0 + OpSelectionMerge %16 None + OpBranchConditional %14 %15 %16 + %15 = OpLabel + %26 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %27 = OpLoad %uint %26 + %30 = OpGroupIAddNonUniformAMD %int %uint_2 Reduce %int_1 + %31 = OpBitcast %uint %30 + %33 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %27 + OpStore %33 %31 + OpBranch %16 + %16 = OpLabel + OpReturn + OpFunctionEnd diff --git a/src/tests/func/shader_ballot/test.spv b/src/tests/func/shader_ballot/test.spv new file mode 100644 index 0000000..37cd930 --- /dev/null +++ b/src/tests/func/shader_ballot/test.spv @@ -0,0 +1,57 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 1 +; Bound: 29 +; Schema: 0 + OpCapability Shader + OpCapability Groups + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 256 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_AMD_shader_ballot" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %Storage "Storage" + OpMemberName %Storage 0 "ua" + OpName %ssbo "ssbo" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %Storage 0 Offset 0 + OpDecorate %Storage BufferBlock + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %Storage = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_Storage = OpTypePointer Uniform %Storage + %ssbo = OpVariable %_ptr_Uniform_Storage Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %uint_3 = OpConstant %uint 3 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_256 = OpConstant %uint 256 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_256 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %22 = OpGroupIAdd %int %uint_3 ExclusiveScan %int_1 + %23 = OpBitcast %uint %22 + %25 = OpAccessChain %_ptr_Uniform_uint %ssbo %int_0 %19 + OpStore %25 %23 + OpReturn + OpFunctionEnd |