diff options
author | Matt Arsenault <arsenm2@gmail.com> | 2017-09-18 18:53:24 -0700 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-20 12:59:10 -0400 |
commit | e408ce1f2bff23121670a8206258c80bb3d9befd (patch) | |
tree | 0772aafed8f269fa335685fa9d9ab41acf999cd6 /tests/cl | |
parent | 7f1f0f40dca636301d740ab08b1de3df6c6206a5 (diff) |
cl: Add tests for function calls
Passes on ROCm, I haven't tried clover recently. Last
time I did it errored because the AsmParser wasn't properly
initialized.
v2: Fix non-unique test names, Wrap noinline in unguarded macro,
use prettier test names, use device_regex (effectively restricting to ROCm)
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
Diffstat (limited to 'tests/cl')
-rw-r--r-- | tests/cl/program/execute/call-clobbers-amdgcn.cl | 67 | ||||
-rw-r--r-- | tests/cl/program/execute/calls-struct.cl | 179 | ||||
-rw-r--r-- | tests/cl/program/execute/calls-workitem-id.cl | 77 | ||||
-rw-r--r-- | tests/cl/program/execute/calls.cl | 607 | ||||
-rw-r--r-- | tests/cl/program/execute/tail-calls.cl | 305 |
5 files changed, 1235 insertions, 0 deletions
diff --git a/tests/cl/program/execute/call-clobbers-amdgcn.cl b/tests/cl/program/execute/call-clobbers-amdgcn.cl new file mode 100644 index 000000000..18e657ce3 --- /dev/null +++ b/tests/cl/program/execute/call-clobbers-amdgcn.cl @@ -0,0 +1,67 @@ +/*! + +[config] +name: amdgcn call clobbers +clc_version_min: 10 +device_regex: gfx[\d]* + +[test] +name: callee saved sgpr +kernel_name: call_clobber_s40 +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 0xabcd1234 + +[test] +name: callee saved vgpr +kernel_name: call_clobber_v40 +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 0xabcd1234 + +!*/ + +#ifndef __AMDGCN__ +#error This test is only for amdgcn +#endif + +__attribute__((noinline)) +void clobber_s40() +{ + __asm volatile("s_mov_b32 s40, 0xdead" : : : "s40"); +} + +kernel void call_clobber_s40(__global int* ret) +{ + __asm volatile("s_mov_b32 s40, 0xabcd1234" : : : "s40"); + + clobber_s40(); + + int tmp; + + __asm volatile("v_mov_b32 %0, s40" + : "=v"(tmp) + : + : "s40"); + *ret = tmp; +} + +__attribute__((noinline)) +void clobber_v40() +{ + __asm volatile("v_mov_b32 v40, 0xdead" : : : "v40"); +} + +kernel void call_clobber_v40(__global int* ret) +{ + __asm volatile("v_mov_b32 v40, 0xabcd1234" : : : "v40"); + + clobber_v40(); + + int tmp; + __asm volatile("v_mov_b32 %0, v40" + : "=v"(tmp) + : + : "v40"); + *ret = tmp; +} diff --git a/tests/cl/program/execute/calls-struct.cl b/tests/cl/program/execute/calls-struct.cl new file mode 100644 index 000000000..04f769dac --- /dev/null +++ b/tests/cl/program/execute/calls-struct.cl @@ -0,0 +1,179 @@ +/*! + +[config] +name: calls with structs +clc_version_min: 10 + +[test] +name: byval struct +kernel_name: call_i32_func_byval_Char_IntArray +dimensions: 1 +global_size: 16 0 0 + +arg_out: 0 buffer int[16] \ + 1021 1022 1023 1024 1025 1026 1027 1028 \ + 1029 1030 1031 1032 1033 1034 1035 1036 + +arg_out: 1 buffer int[16] \ + 14 14 14 14 \ + 14 14 14 14 \ + 14 14 14 14 \ + 14 14 14 14 \ + +arg_in: 2 buffer int[16] \ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + +[test] +name: sret struct +kernel_name: call_sret_Char_IntArray_func +dimensions: 1 +global_size: 16 0 0 + +arg_out: 0 buffer int[16] \ + 921 922 923 924 925 926 927 928 \ + 929 930 931 932 933 934 935 936 + +arg_in: 1 buffer int[16] \ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + +[test] +name: byval struct and sret struct +kernel_name: call_sret_Char_IntArray_func_byval_Char_IntArray +dimensions: 1 +global_size: 16 0 0 + +arg_out: 0 buffer int[16] \ + 86 87 88 89 \ + 90 91 92 93 \ + 94 95 96 97 \ + 98 99 100 101 + +arg_out: 1 buffer int[16] \ + 134 135 136 137 \ + 138 139 140 141 \ + 142 143 144 145 \ + 146 147 148 149 + +arg_in: 2 buffer int[16] \ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + +!*/ + +#define NOINLINE __attribute__((noinline)) + +typedef struct ByVal_Char_IntArray { + char c; + int i[4]; +} ByVal_Char_IntArray; + +NOINLINE +int i32_func_byval_Char_IntArray(ByVal_Char_IntArray st) +{ + st.i[0] += 100; + + int sum = 0; + for (int i = 0; i < 4; ++i) + { + sum += st.i[i]; + } + + sum += st.c; + return sum; +} + +kernel void call_i32_func_byval_Char_IntArray(global int* out0, + global int* out1, + global int* input) +{ + ByVal_Char_IntArray st; + st.c = 15; + + int id = get_global_id(0); + + int val = input[id]; + st.i[0] = 14; + st.i[1] = -8; + st.i[2] = val; + st.i[3] = 900; + + int result = i32_func_byval_Char_IntArray(st); + out0[id] = result; + out1[id] = st.i[0]; +} + +NOINLINE +ByVal_Char_IntArray sret_Char_IntArray_func(global int* input, int id) +{ + ByVal_Char_IntArray st; + st.c = 15; + + int val = input[id]; + st.i[0] = 14; + st.i[1] = -8; + st.i[2] = val; + st.i[3] = 900; + + return st; +} + +kernel void call_sret_Char_IntArray_func(global int* output, global int* input) +{ + int id = get_global_id(0); + ByVal_Char_IntArray st = sret_Char_IntArray_func(input, id); + + int sum = 0; + for (int i = 0; i < 4; ++i) + { + sum += st.i[i]; + } + + sum += st.c; + output[id] = sum; +} + +NOINLINE +ByVal_Char_IntArray sret_Char_IntArray_func_byval_Char_IntArray(ByVal_Char_IntArray st) +{ + st.c += 15; + + st.i[0] += 14; + st.i[1] -= 8; + st.i[2] += 9; + st.i[3] += 18; + + return st; +} + +kernel void call_sret_Char_IntArray_func_byval_Char_IntArray(global int* output0, + global int* output1, + global int* input) +{ + int id = get_global_id(0); + + volatile ByVal_Char_IntArray st0; + st0.c = -20; + + int val = input[id]; + st0.i[0] = 14; + st0.i[1] = -8; + st0.i[2] = val; + st0.i[3] = 100; + + ByVal_Char_IntArray st1 = sret_Char_IntArray_func_byval_Char_IntArray(st0); + + int sum0 = 0; + int sum1 = 0; + for (int i = 0; i < 4; ++i) + { + sum0 += st0.i[i]; + sum1 += st1.i[i]; + } + + sum0 += st0.c; + sum1 += st1.c; + + output0[id] = sum0; + output1[id] = sum1; +} diff --git a/tests/cl/program/execute/calls-workitem-id.cl b/tests/cl/program/execute/calls-workitem-id.cl new file mode 100644 index 000000000..7edfad7e9 --- /dev/null +++ b/tests/cl/program/execute/calls-workitem-id.cl @@ -0,0 +1,77 @@ +/*! + +[config] +name: calls workitem IDs +clc_version_min: 10 + +[test] +name: Callee function use get_global_id(0) +kernel_name: kernel_call_pass_get_global_id_0 +dimensions: 1 +global_size: 64 0 0 +arg_out: 0 buffer uint[64] \ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 \ + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \ + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \ + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 + +[test] +name: Callee function use get_global_id 0..2 +kernel_name: kernel_call_pass_get_global_id_012 +dimensions: 3 +global_size: 8 4 2 +arg_out: 0 buffer uint[64] \ + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 + +arg_out: 1 buffer uint[64] \ + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \ + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 \ + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \ + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 + +arg_out: 2 buffer uint[64] \ + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + +!*/ + +#define NOINLINE __attribute__((noinline)) + +NOINLINE +void func_get_global_id_0(volatile global uint* out) +{ + uint gid = get_global_id(0); + out[gid] = gid; +} + +kernel void kernel_call_pass_get_global_id_0(global uint *out) +{ + func_get_global_id_0(out); +} + +NOINLINE +void func_get_global_id_012(volatile global uint* out0, + volatile global uint* out1, + volatile global uint* out2) +{ + uint id0 = get_global_id(0); + uint id1 = get_global_id(1); + uint id2 = get_global_id(2); + uint flat_id = (id2 * get_global_size(1) + id1) * get_global_size(0) + id0; + + out0[flat_id] = id0; + out1[flat_id] = id1; + out2[flat_id] = id2; +} + +kernel void kernel_call_pass_get_global_id_012(global uint *out0, + global uint *out1, + global uint *out2) +{ + func_get_global_id_012(out0, out1, out2); +} diff --git a/tests/cl/program/execute/calls.cl b/tests/cl/program/execute/calls.cl new file mode 100644 index 000000000..f4f55be31 --- /dev/null +++ b/tests/cl/program/execute/calls.cl @@ -0,0 +1,607 @@ +/*! + +[config] +name: calls +clc_version_min: 10 + +[test] +name: Call void_func_void +kernel_name: call_void_func_void +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 12345 + +[test] +name: Call i32_func_void +kernel_name: call_i32_func_void +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 0x12345 + +[test] +name: Call i64_func_void +kernel_name: call_i64_func_void +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer long[1] 0x100000000000 + + +[test] +name: Call call_i32_func_void_callee_stack +kernel_name: call_i32_func_void_callee_stack +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 290 + +[test] +name: Call call_i32_func_p0i32_i32_caller_stack +kernel_name: call_i32_func_p0i32_i32_caller_stack +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 175 + +[test] +name: Call call_i32_func_p0i32_i32_indirect_kernel_stack +kernel_name: call_i32_func_p0i32_i32_indirect_kernel_stack +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 241 + +[test] +name: Call call_i32_func_p0i32_i32_indirect_function_stack +kernel_name: call_i32_func_p0i32_i32_indirect_function_stack +dimensions: 1 +global_size: 1 0 0 +arg_out: 0 buffer int[1] 291 + +[test] +name: callee stack corruption +kernel_name: kernel_call_nested_stack_usage +dimensions: 1 +global_size: 10 0 0 + +arg_out: 0 buffer int4[10] \ + 53 48 156 160 \ + 84 248 102 150 \ + 102 56 217 106 \ + 100 123 151 139 \ + 80 150 135 163 \ + 223 99 117 199 \ + 187 262 223 169 \ + 277 129 73 121 \ + 162 165 138 137 \ + 204 207 223 145 \ + + +arg_in: 1 buffer int4[10] \ + 0 13 76 46 \ + 4 74 33 63 \ + 26 9 95 7 \ + 41 54 47 29 \ + 15 68 38 39 \ + 91 43 14 95 \ + 44 83 69 70 \ + 89 54 14 45 \ + 77 63 21 21 \ + 64 70 80 70 + +arg_in: 2 buffer int4[10] \ + 53 22 4 68 \ + 76 100 36 24 \ + 50 38 27 92 \ + 18 15 57 81 \ + 50 14 59 85 \ + 41 13 89 9 \ + 99 96 85 29 \ + 99 21 45 31 \ + 8 39 96 95 \ + 76 67 63 5 + +[test] +name: nested calls +kernel_name: kernel_nested_calls +dimensions: 1 +global_size: 4 0 0 + +arg_out: 0 buffer int[4] \ + 1 7 155 -4 + +arg_in: 1 buffer int[4] \ + 0 100 1234 -912 + +arg_in: 2 buffer int[4] \ + 1 4 2 45 + + +[test] +name: Kernel call stack argument +kernel_name: kernel_call_stack_arg +dimensions: 1 +global_size: 10 0 0 + + +arg_out: 0 buffer int4[10] \ + 11440 1348 29304 16698 \ + 47975 3626 30850 13224 \ + 8235 30495 31995 1455 \ + 16048 40512 33992 7028 \ + 9450 5356 21330 23130 \ + 21120 35186 52896 49968 \ + 34083 28520 0 0 \ + 12384 41492 4420 17880 \ + 37310 19320 37518 13175 \ + 23852 16014 22734 24284 \ + + +arg_in: 1 buffer int4[10] \ + 0 13 76 46 \ + 63 76 100 36 \ + 27 92 53 46 \ + 53 50 96 75 \ + 99 41 14 57 \ + 35 45 81 94 \ + 80 71 74 1 \ + 78 73 32 42 \ + 60 17 83 15 \ + 13 53 31 59 + +arg_in: 2 buffer int4[10] \ + 53 22 4 68 \ + 24 99 72 76 \ + 95 5 76 77 \ + 56 89 63 85 \ + 25 49 46 97 \ + 65 21 68 91 \ + 89 53 46 6 \ + 68 68 20 84 \ + 99 25 23 10 \ + 52 43 26 37 + +arg_in: 3 buffer int4[10] \ + 68 94 38 52 \ + 65 7 63 89 \ + 83 12 1 69 \ + 16 21 72 13 \ + 12 20 32 63 \ + 25 86 47 51 \ + 72 49 67 68 \ + 71 83 9 8 \ + 22 64 70 80 \ + 39 45 48 39 + +arg_in: 4 buffer int4[10] \ + 83 3 5 53 \ + 27 44 77 48 \ + 87 63 74 73 \ + 9 27 0 41 \ + 12 65 62 81 \ + 60 82 76 46 \ + 20 92 87 89 \ + 77 63 21 21 \ + 70 76 67 63 \ + 28 7 37 25 + +arg_in: 5 buffer int4[10] \ + 67 0 38 6 \ + 24 27 36 16 \ + 100 89 23 30 \ + 2 71 94 24 \ + 25 48 39 20 \ + 96 63 44 83 \ + 54 14 45 99 \ + 8 39 96 95 \ + 5 60 22 32 \ + 67 68 51 73 + +arg_in: 6 buffer int4[10] \ + 42 69 59 93 \ + 49 90 91 6 \ + 35 51 59 85 \ + 18 32 89 65 \ + 2 91 43 14 \ + 69 70 99 96 \ + 21 45 31 51 \ + 39 27 69 28 \ + 70 11 77 53 \ + 72 95 46 94 + +arg_in: 7 buffer int4[10] \ + 85 53 9 66 \ + 91 50 52 32 \ + 41 84 27 41 \ + 15 68 38 39 \ + 95 41 13 89 \ + 85 29 54 51 \ + 89 44 47 81 \ + 78 79 42 28 \ + 55 59 33 71 \ + 32 46 52 66 + +arg_in: 8 buffer int4[10] \ + 42 70 91 76 \ + 99 49 26 9 \ + 54 47 29 18 \ + 50 14 59 85 \ + 9 16 7 36 \ + 10 41 58 88 \ + 36 21 100 15 \ + 19 1 19 99 \ + 14 16 49 86 \ + 40 61 99 15 + +arg_in: 9 buffer int4[10] \ + 26 4 74 33 \ + 95 7 50 38 \ + 15 57 81 3 \ + 59 96 56 14 \ + 25 13 79 45 \ + 44 73 87 72 \ + 63 62 0 0 \ + 24 82 13 40 \ + 82 56 74 31 \ + 67 34 54 52 + +!*/ + +// The inline asm is necessary to defeat interprocedural sparse +// conditional constant propagation eliminating some of the trivial +// calls. +#ifdef __AMDGCN__ +#define USE_ASM 1 +#endif + +#define NOINLINE __attribute__((noinline)) + +NOINLINE +void void_func_void(void) +{ +#if USE_ASM + __asm(""); +#endif +} + +kernel void call_void_func_void(__global int* ret) +{ + void_func_void(); + *ret = 12345; +} + +NOINLINE +int i32_func_void(void) +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x12345" : "=v"(ret)); +#else + ret = 0x12345; +#endif + + return ret; +} + +kernel void call_i32_func_void(__global int* ret) +{ + *ret = i32_func_void(); +} + +NOINLINE +long i64_func_void(void) +{ + long ret; +#if USE_ASM + __asm("v_lshlrev_b64 %0, 44, 1" : "=v"(ret)); +#else + ret = 1ull << 44; +#endif + return ret; +} + +kernel void call_i64_func_void(__global long* ret) +{ + *ret = i64_func_void(); +} + + +NOINLINE +int i32_func_void_callee_stack(void) +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); +#else + ret = 0x64; +#endif + + volatile int alloca[20]; + + for (int i = 0; i < 20; ++i) + { + alloca[i] = i; + } + + for (int i = 0; i < 20; ++i) + { + ret += alloca[i]; + } + + return ret; +} + +kernel void call_i32_func_void_callee_stack(__global int* ret) +{ + volatile int alloca[10]; + + for (int i = 0; i < 10; ++i) + { + alloca[i] = 0xffff; + } + + + *ret = i32_func_void_callee_stack(); +} + +NOINLINE +int i32_func_p0i32_i32_caller_stack(volatile int* stack, int n) +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); +#else + ret = 0x64; +#endif + + for (int i = 0; i < n; ++i) + { + ret += stack[i]; + } + + return ret; +} + +kernel void call_i32_func_p0i32_i32_caller_stack(__global int* ret) +{ + volatile int alloca[10]; + + for (int i = 0; i < 10; ++i) + { + alloca[i] = 3 + i; + } + + *ret = i32_func_p0i32_i32_caller_stack(alloca, 10); +} + +NOINLINE +int i32_func_p0i32_i32_indirect_stack(volatile int* stack, int n) +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); +#else + ret = 0x64; +#endif + for (int i = 0; i < n; ++i) + { + ret += stack[i]; + } + + return ret; +} + +// Access a stack object in the parent kernel's frame. +NOINLINE +int i32_func_p0i32_i32_pass_kernel_stack(volatile int* stack, int n) +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x42" : "=v"(ret)); +#else + ret = 0x42; +#endif + + volatile int local_object[10]; + for (int i = 0; i < 10; ++i) + local_object[i] = -1; + + ret += i32_func_p0i32_i32_indirect_stack(stack, n); + + return ret; +} + +kernel void call_i32_func_p0i32_i32_indirect_kernel_stack(volatile __global int* ret) +{ + volatile int alloca[10]; + + for (int i = 0; i < 10; ++i) + { + alloca[i] = 3 + i; + } + + *ret = i32_func_p0i32_i32_pass_kernel_stack(alloca, 10); +} + +// Access a stack object in a parent non-kernel function's stack frame. +NOINLINE +int i32_func_void_pass_function_stack() +{ + int ret; +#if USE_ASM + __asm("v_mov_b32 %0, 0x42" : "=v"(ret)); +#else + ret = 0x42; +#endif + + volatile int local_object[10]; + for (int i = 0; i < 10; ++i) + local_object[i] = 8 + i; + + ret += i32_func_p0i32_i32_indirect_stack(local_object, 10); + return ret; +} + +kernel void call_i32_func_p0i32_i32_indirect_function_stack(__global int* ret) +{ + *ret = i32_func_void_pass_function_stack(); +} + +NOINLINE +int4 v4i32_func_v4i32_v4i32_stack(int4 arg0, int4 arg1) +{ + // Force stack usage. + volatile int4 args[8] = { arg0, arg1 }; + + int4 total = 0; + for (int i = 0; i < 8; ++i) + { + total += args[i]; + } + + return total; +} + +// Make sure using stack in a callee function from a callee function +// doesn't corrupt caller's stack objects. +NOINLINE +int4 nested_stack_usage_v4i32_func_v4i32_v4i32(int4 arg0, int4 arg1) +{ + volatile int stack_object[4]; + for (int i = 0; i < 4; ++i) { + const int test_val = 0x04030200 | i; + stack_object[i] = test_val; + } + + arg0 *= 2; + + int4 result = v4i32_func_v4i32_v4i32_stack(arg0, arg1); + + // Check for stack corruption + for (int i = 0; i < 4; ++i) + { + const int test_val = 0x04030200 | i; + if (stack_object[i] != test_val) + result = -1; + } + + return result; +} + +kernel void kernel_call_nested_stack_usage(global int4* output, + global int4* input0, + global int4* input1) +{ + int id = get_global_id(0); + output[id] = nested_stack_usage_v4i32_func_v4i32_v4i32( + input0[id], + input1[id]); +} + +NOINLINE +int func_div_add(int x, int y) +{ + return x / y + 4; +} + +NOINLINE +int call_i32_func_i32_i32(int x, int y, volatile int* ptr) +{ + int tmp = func_div_add(x, y) >> 2; + return tmp + *ptr; +} + +kernel void kernel_nested_calls(global int* output, + global int* input0, + global int* input1) +{ + int id = get_global_id(0); + volatile int zero = 0; + output[id] = call_i32_func_i32_i32(input0[id], input1[id], &zero); +} + +NOINLINE +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( + int4 arg0, int4 arg1, int4 arg2, int4 arg3, + int4 arg4, int4 arg5, int4 arg6, int4 arg7, + int4 arg8) +{ + // Try to make sure we can't clobber the incoming stack arguments + // with local stack objects. + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 }; + volatile int4 last_arg = arg8; + + int4 total = 0; + for (int i = 0; i < 8; ++i) + { + total += args[i]; + } + + return total * last_arg; +} + + // Test argument passed on stack, but doesn't use byval. +NOINLINE +int4 stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( + int4 arg0, int4 arg1, int4 arg2, int4 arg3, + int4 arg4, int4 arg5, int4 arg6, int4 arg7, + int4 arg8) +{ + volatile int stack_object[8]; + for (int i = 0; i < 8; ++i) { + const int test_val = 0x04030200 | i; + stack_object[i] = test_val; + } + + arg0 *= 2; + + int4 result = v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( + arg0, arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8); + + // Check for stack corruption. + for (int i = 0; i < 8; ++i) + { + const int test_val = 0x04030200 | i; + if (stack_object[i] != test_val) + result = -1; + } + + return result; +} + +kernel void kernel_call_stack_arg(global int4* output, + global int4* input0, + global int4* input1, + global int4* input2, + global int4* input3, + global int4* input4, + global int4* input5, + global int4* input6, + global int4* input7, + global int4* input8) +{ + int id = get_global_id(0); + + volatile int stack_object[8]; + for (int i = 0; i < 8; ++i) { + const int test_val = 0x05060700 | i; + stack_object[i] = test_val; + } + + output[id] = stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( + input0[id], + input1[id], + input2[id], + input3[id], + input4[id], + input5[id], + input6[id], + input7[id], + input8[id]); + + // Check for stack corruption. + for (int i = 0; i < 8; ++i) + { + const int test_val = 0x05060700 | i; + if (stack_object[i] != test_val) + output[id] = -1; + } + +} diff --git a/tests/cl/program/execute/tail-calls.cl b/tests/cl/program/execute/tail-calls.cl new file mode 100644 index 000000000..cf33373cc --- /dev/null +++ b/tests/cl/program/execute/tail-calls.cl @@ -0,0 +1,305 @@ +/*! + +[config] +name: tail calls +clc_version_min: 10 +dimensions: 1 + +[test] +name: Basic tail call +kernel_name: kernel_call_tailcall +global_size: 4 0 0 + +arg_out: 0 buffer int[4] \ + 4 11 107 -12 + +arg_in: 1 buffer int[4] \ + 0 100 1234 -912 + +arg_in: 2 buffer int[4] \ + 1 4 2 45 + +[test] +name: Tail call with more arguments than caller +kernel_name: kernel_call_tailcall_extra_arg +global_size: 4 0 0 + +arg_out: 0 buffer int[4] \ + 2 112 1340 -882 + +arg_in: 1 buffer int[4] \ + 0 100 1234 -912 + +arg_in: 2 buffer int[4] \ + 1 4 2 45 + +[test] +name: Tail call with fewer arguments than acller +kernel_name: kernel_call_tailcall_fewer_args +global_size: 4 0 0 + +arg_out: 0 buffer int[4] \ + 4 8 81 -10 + +arg_in: 1 buffer int[4] \ + 0 100 1234 -912 + +arg_in: 2 buffer int[4] \ + 1 4 2 45 + +arg_in: 3 buffer int[4] \ + 3 8 4 9 + +[test] +name: Tail call with stack passed argument +kernel_name: kernel_call_tailcall_stack_passed_args +global_size: 10 0 0 + +arg_out: 0 buffer int4[10] \ + 11440 8762 10296 13156 \ + 19649 31311 18081 24745 \ + 10476 11772 17766 11070 \ + 22165 18005 28665 35945 \ + 624 938 768 990 \ + 30618 28791 30240 31815 \ + 49851 47676 46806 47676 \ + 4400 4272 3392 2632 \ + 10582 8712 8514 7854 \ + 19737 21199 23865 18533 \ + + +arg_in: 1 buffer int4[10] \ + 0 13 76 46 \ + 4 74 33 63 \ + 26 9 95 7 \ + 41 54 47 29 \ + 15 68 38 39 \ + 91 43 14 95 \ + 44 83 69 70 \ + 89 54 14 45 \ + 77 63 21 21 \ + 64 70 80 70 + +arg_in: 2 buffer int4[10] \ + 53 22 4 68 \ + 76 100 36 24 \ + 50 38 27 92 \ + 18 15 57 81 \ + 50 14 59 85 \ + 41 13 89 9 \ + 99 96 85 29 \ + 99 21 45 31 \ + 8 39 96 95 \ + 76 67 63 5 + +arg_in: 3 buffer int4[10] \ + 68 94 38 52 \ + 99 72 76 65 \ + 53 46 95 5 \ + 3 53 50 96 \ + 59 96 56 14 \ + 16 7 36 25 \ + 54 51 10 41 \ + 51 89 44 47 \ + 39 27 69 28 \ + 60 22 32 70 + +arg_in: 4 buffer int4[10] \ + 83 3 5 53 \ + 7 63 89 27 \ + 76 77 83 12 \ + 75 56 89 63 \ + 99 41 14 57 \ + 13 79 45 35 \ + 58 88 44 73 \ + 81 36 21 100 \ + 78 79 42 28 \ + 11 77 53 55 + +arg_in: 5 buffer int4[10] \ + 67 0 38 6 \ + 44 77 48 24 \ + 1 69 87 63 \ + 85 16 21 72 \ + 25 49 46 97 \ + 45 81 94 65 \ + 87 72 80 71 \ + 15 63 62 0 \ + 19 1 19 99 \ + 59 33 71 14 + +arg_in: 6 buffer int4[10] \ + 42 69 59 93 \ + 27 36 16 49 \ + 74 73 100 89 \ + 13 9 27 0 \ + 12 20 32 63 \ + 21 68 91 25 \ + 74 1 89 53 \ + 0 78 73 32 \ + 24 82 13 40 \ + 16 49 86 82 + +arg_in: 7 buffer int4[10] \ + 85 53 9 66 \ + 90 91 6 91 \ + 23 30 35 51 \ + 41 2 71 94 \ + 12 65 62 81 \ + 86 47 51 60 \ + 46 6 72 49 \ + 42 68 68 20 \ + 60 17 83 15 \ + 56 74 31 13 + +arg_in: 8 buffer int4[10] \ + 42 70 91 76 \ + 50 52 32 99 \ + 59 85 41 84 \ + 24 18 32 89 \ + 25 48 39 20 \ + 82 76 46 96 \ + 67 68 20 92 \ + 84 71 83 9 \ + 99 25 23 10 \ + 53 31 59 52 + +arg_in: 9 buffer int[10] \ + 26 \ + 49 \ + 27 \ + 65 \ + 2 \ + 63 \ + 87 \ + 8 \ + 22 \ + 43 + +!*/ + +#define NOINLINE __attribute__((noinline)) + +NOINLINE +int i32_func_i32_i32(int x, int y) +{ + return x / y + 4; +} + +NOINLINE +int i32_func_i32_i32_i32(int x, int y, int z) +{ + return x / y + z; +} + +// Test a basic tail call +NOINLINE +int tailcall_i32_func_i32_i32(int x, int y) +{ + x += 5; + y += 10; + return i32_func_i32_i32(x, y); +} + +// Test a basic tail call with more arguments in the callee than +// caller. +NOINLINE +int tailcall_i32_func_i32_i32_extra_arg(int x, int y) +{ + int z = x + y + 1; + x += 5; + y += 10; + return i32_func_i32_i32_i32(x, y, z); +} + +// Test a basic tail call with fewere arguments in the callee than +// caller. +NOINLINE +int tailcall_i32_func_i32_i32_i32_fewer_args(int x, int y, int z) +{ + x += 5; + y += 10; + return i32_func_i32_i32(x, y + z); +} + +kernel void kernel_call_tailcall(global int* output, + global int* input0, + global int* input1) +{ + int id = get_global_id(0); + output[id] = tailcall_i32_func_i32_i32(input0[id], input1[id]); +} + +kernel void kernel_call_tailcall_extra_arg(global int* output, + global int* input0, + global int* input1) +{ + int id = get_global_id(0); + output[id] = tailcall_i32_func_i32_i32_extra_arg(input0[id], input1[id]); +} + +kernel void kernel_call_tailcall_fewer_args(global int* output, + global int* input0, + global int* input1, + global int* input2) +{ + int id = get_global_id(0); + output[id] = tailcall_i32_func_i32_i32_i32_fewer_args(input0[id], input1[id], input2[id]); +} + +NOINLINE +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( + int4 arg0, int4 arg1, int4 arg2, int4 arg3, + int4 arg4, int4 arg5, int4 arg6, int4 arg7, + int arg8) +{ + // Try to make sure we can't clobber the incoming stack arguments + // with local stack objects. + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 }; + volatile int scalar_arg = arg8; + + int4 total = 0; + for (int i = 0; i < 8; ++i) + { + total += args[i]; + } + + return total * scalar_arg; +} + +// Test a basic tail call +NOINLINE +int4 tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( + int4 arg0, int4 arg1, int4 arg2, int4 arg3, + int4 arg4, int4 arg5, int4 arg6, int4 arg7, + int arg8) +{ + arg0 *= 2; + return v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( + arg0, arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8); +} + +kernel void kernel_call_tailcall_stack_passed_args(global int4* output, + global int4* input0, + global int4* input1, + global int4* input2, + global int4* input3, + global int4* input4, + global int4* input5, + global int4* input6, + global int4* input7, + global int* input8) +{ + int id = get_global_id(0); + output[id] = tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( + input0[id], + input1[id], + input2[id], + input3[id], + input4[id], + input5[id], + input6[id], + input7[id], + input8[id]); +} |