summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Watry <awatry@gmail.com>2013-05-14 20:24:41 -0500
committerAaron Watry <awatry@gmail.com>2013-05-14 20:24:41 -0500
commitc655e963619d5a96a3f0a36f9e50da2743554229 (patch)
tree8258f07b1f8e65b6de1bf7c0c9d202719b1e86a4
parentfbddc8a580e8f772e85cb1c80ca6fbbb05e466d7 (diff)
libclc: Don't build vload4 from 2x vload2...
It creates a bunch of extra instructions in the assembly that may not get optimized out.
-rw-r--r--generic/lib/shared/vload.cl2
1 files changed, 1 insertions, 1 deletions
diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
index 1f266fa..24d8240 100644
--- a/generic/lib/shared/vload.cl
+++ b/generic/lib/shared/vload.cl
@@ -10,7 +10,7 @@
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##4)(vload2(offset, x), vload2(offset+2, x)); \
+ return (PRIM_TYPE##4)(x[offset], x[offset+1], x[offset+2], x[offset+3]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \