summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2012-11-22 11:20:42 -0600
committerJosé Fonseca <jfonseca@vmware.com>2012-11-29 11:51:53 +0000
commitdd5c58081672bd495e0ffef1c3cc1229620f0f88 (patch)
tree851854783ebfe4035e489984d15b610caa70d515
parent2ea7d3dabd01608c1d0b020ef941912bd3893a96 (diff)
gallivm: Altivec vector add/sub intrisics
This patch add correct vector addition and substraction intrisics when using Altivec with PPC. Current code uses default path and LLVM backend ends up issuing carry-out arithmetic instruction while it is expected saturated ones. It also includes a fix for PowerPC where char are unsigned by default, resulting in bogus values for vector shifting. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c40
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c2
2 files changed, 27 insertions, 15 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index a4d7d98cc94..c3df3bf73b2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -365,13 +365,19 @@ lp_build_add(struct lp_build_context *bld,
if(a == bld->one || b == bld->one)
return bld->one;
- if(util_cpu_caps.has_sse2 &&
- type.width * type.length == 128 &&
- !type.floating && !type.fixed) {
- if(type.width == 8)
- intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
- if(type.width == 16)
- intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
+ if (type.width * type.length == 128 &&
+ !type.floating && !type.fixed) {
+ if(util_cpu_caps.has_sse2) {
+ if(type.width == 8)
+ intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
+ if(type.width == 16)
+ intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
+ } else if (util_cpu_caps.has_altivec) {
+ if(type.width == 8)
+ intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
+ if(type.width == 16)
+ intrinsic = type.sign ? "llvm.ppc.altivec.vaddsws" : "llvm.ppc.altivec.vadduws";
+ }
}
if(intrinsic)
@@ -636,13 +642,19 @@ lp_build_sub(struct lp_build_context *bld,
if(b == bld->one)
return bld->zero;
- if(util_cpu_caps.has_sse2 &&
- type.width * type.length == 128 &&
- !type.floating && !type.fixed) {
- if(type.width == 8)
- intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
- if(type.width == 16)
- intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
+ if (type.width * type.length == 128 &&
+ !type.floating && !type.fixed) {
+ if (util_cpu_caps.has_sse2) {
+ if(type.width == 8)
+ intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
+ if(type.width == 16)
+ intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
+ } else if (util_cpu_caps.has_altivec) {
+ if(type.width == 8)
+ intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
+ if(type.width == 16)
+ intrinsic = type.sign ? "llvm.ppc.altivec.vsubsws" : "llvm.ppc.altivec.vsubuws";
+ }
}
if(intrinsic)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 377884a78cf..ae4033b6086 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -246,7 +246,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
* YYYY YYYY .... YYYY <= output
*/
struct lp_type type4;
- const char shifts[4][2] = {
+ const int shifts[4][2] = {
{ 1, 2},
{-1, 2},
{ 1, -2},