diff options
author | Aaron Watry <awatry@gmail.com> | 2013-05-14 20:24:41 -0500 |
---|---|---|
committer | Aaron Watry <awatry@gmail.com> | 2013-05-14 20:24:41 -0500 |
commit | c655e963619d5a96a3f0a36f9e50da2743554229 (patch) | |
tree | 8258f07b1f8e65b6de1bf7c0c9d202719b1e86a4 | |
parent | fbddc8a580e8f772e85cb1c80ca6fbbb05e466d7 (diff) |
libclc: Don't build vload4 from 2x vload2...
It creates a bunch of extra instructions in the assembly that may not get
optimized out.
-rw-r--r-- | generic/lib/shared/vload.cl | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl index 1f266fa..24d8240 100644 --- a/generic/lib/shared/vload.cl +++ b/generic/lib/shared/vload.cl @@ -10,7 +10,7 @@ } \ \ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ - return (PRIM_TYPE##4)(vload2(offset, x), vload2(offset+2, x)); \ + return (PRIM_TYPE##4)(x[offset], x[offset+1], x[offset+2], x[offset+3]); \ } \ \ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ |