diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
31 files changed, 952 insertions, 325 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld.h b/src/gallium/auxiliary/gallivm/lp_bld.h new file mode 100644 index 0000000000..70a4960f91 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Wrapper for LLVM header file #includes. + */ + + +#ifndef LP_BLD_H +#define LP_BLD_H + + +#include <llvm-c/Core.h> + + +/** Set version to 0 if missing to avoid #ifdef HAVE_LLVM everywhere */ +#ifndef HAVE_LLVM +#define HAVE_LLVM 0x0207 +#endif + + +#endif /* LP_BLD_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index fe3cedcc48..0f99fec65e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index aa47338b32..8e8fcccf56 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -361,12 +361,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; - c8 = lp_build_int_const_scalar(i16_type, 8); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); + b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else @@ -374,7 +374,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder, /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), ""); #endif @@ -429,7 +429,7 @@ lp_build_mul(struct lp_build_context *bld, } if(type.fixed) - shift = lp_build_int_const_scalar(type, type.width/2); + shift = lp_build_const_int_vec(type, type.width/2); else shift = NULL; @@ -491,7 +491,7 @@ lp_build_mul_imm(struct lp_build_context *bld, * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); - factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa); a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); a = LLVMBuildAdd(bld->builder, a, factor, ""); a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); @@ -499,12 +499,12 @@ lp_build_mul_imm(struct lp_build_context *bld, #endif } else { - factor = lp_build_const_scalar(bld->type, shift); + factor = lp_build_const_vec(bld->type, shift); return LLVMBuildShl(bld->builder, a, factor, ""); } } - factor = lp_build_const_scalar(bld->type, (double)b); + factor = lp_build_const_vec(bld->type, (double)b); return lp_build_mul(bld, a, factor); } @@ -567,7 +567,7 @@ lp_build_lerp(struct lp_build_context *bld, * but it will be wrong for other uses. Basically we need a more * powerful lp_type, capable of further distinguishing the values * interpretation from the value storage. */ - res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); return res; } @@ -689,7 +689,7 @@ lp_build_abs(struct lp_build_context *bld, /* vector of floats */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); + LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); @@ -751,7 +751,7 @@ lp_build_sgn(struct lp_build_context *bld, /* vector */ int_type = lp_build_int_vec_type(type); vec_type = lp_build_vec_type(type); - mask = lp_build_int_const_scalar(type, maskBit); + mask = lp_build_const_int_vec(type, maskBit); } /* Take the sign bit and add it to 1 constant */ @@ -763,14 +763,14 @@ lp_build_sgn(struct lp_build_context *bld, } else { - LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + LLVMValueRef minus_one = lp_build_const_vec(type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); - res = lp_build_select(bld, cond, bld->zero, bld->one); + res = lp_build_select(bld, cond, bld->zero, res); return res; } @@ -789,8 +789,8 @@ lp_build_set_sign(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); - LLVMValueRef mask = lp_build_int_const_scalar(type, + LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1); + LLVMValueRef mask = lp_build_const_int_vec(type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; @@ -930,7 +930,10 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); if (type.length == 1) { - return LLVMBuildFPTrunc(bld->builder, a, LLVMFloatType(), ""); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); + return res; } if(util_cpu_caps.has_sse4_1) @@ -993,7 +996,7 @@ lp_build_itrunc(struct lp_build_context *bld, if (type.length == 1) { LLVMTypeRef int_type = LLVMIntType(type.width); - return LLVMBuildFPTrunc(bld->builder, a, int_type, ""); + return LLVMBuildFPToSI(bld->builder, a, int_type, ""); } else { LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); @@ -1031,7 +1034,7 @@ lp_build_iround(struct lp_build_context *bld, } else { LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef half; @@ -1040,7 +1043,7 @@ lp_build_iround(struct lp_build_context *bld, sign = LLVMBuildAnd(bld->builder, sign, mask, ""); /* sign * 0.5 */ - half = lp_build_const_scalar(type, 0.5); + half = lp_build_const_vec(type, 0.5); half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); @@ -1083,18 +1086,18 @@ lp_build_ifloor(struct lp_build_context *bld, /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ @@ -1265,7 +1268,7 @@ lp_build_exp(struct lp_build_context *bld, LLVMValueRef x) { /* log2(e) = 1/log(2) */ - LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634); + LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1279,7 +1282,7 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); + LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1315,7 +1318,7 @@ lp_build_polynomial(struct lp_build_context *bld, if (type.length == 1) coeff = LLVMConstReal(float_type, coeffs[i]); else - coeff = lp_build_const_scalar(type, coeffs[i]); + coeff = lp_build_const_vec(type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); @@ -1372,11 +1375,11 @@ lp_build_exp2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); - x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); + x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999)); /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); + ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ @@ -1386,8 +1389,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); + expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); + expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } @@ -1453,8 +1456,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); - LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); + LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000); + LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; @@ -1479,8 +1482,8 @@ lp_build_log2_approx(struct lp_build_context *bld, } if(p_floor_log2 || p_log2) { - logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); - logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); + logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), ""); + logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 7a10fe1220..31efa9921c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index 5a9e1c1fb2..ebbdb1a604 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 8a275fa72f..57843e9a60 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -263,7 +263,7 @@ lp_build_one(struct lp_type type) if(type.sign) /* TODO: Unfortunately this caused "Tried to create a shift operation * on a non-integer type!" */ - vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1)); + vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1)); #endif return vec; @@ -283,8 +283,8 @@ lp_build_one(struct lp_type type) * Build constant-valued vector from a scalar value. */ LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val) +lp_build_const_vec(struct lp_type type, + double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -309,7 +309,7 @@ lp_build_const_scalar(struct lp_type type, LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, +lp_build_const_int_vec(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index 4078636103..9ca2f0664e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,9 +37,9 @@ #define LP_BLD_CONST_H -#include "os/os_llvm.h" +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> struct lp_type; @@ -85,13 +85,11 @@ lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val); +lp_build_const_vec(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, - long long val); +lp_build_const_int_vec(struct lp_type type, long long val); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index f77cf78721..3f7f2ebde9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -114,13 +114,13 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); + res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); + res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ /* YES: this fixes progs/trivial/tri-z-eq.c. @@ -130,21 +130,21 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, #if 0 { LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); + msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); + msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); + msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); + res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else - res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -183,10 +183,10 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, if(src_width > mantissa) { int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); + res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } - bias_ = lp_build_const_scalar(dst_type, bias); + bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, @@ -195,7 +195,7 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); + res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -251,7 +251,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_min == 0.0) thres = bld.zero; else - thres = lp_build_const_scalar(src_type, dst_min); + thres = lp_build_const_vec(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } @@ -260,7 +260,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_max == 1.0) thres = bld.one; else - thres = lp_build_const_scalar(src_type, dst_max); + thres = lp_build_const_vec(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } @@ -288,7 +288,7 @@ lp_build_conv(LLVMBuilderRef builder, LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -315,7 +315,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); @@ -388,7 +388,7 @@ lp_build_conv(LLVMBuilderRef builder, } if (src_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -400,7 +400,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 78e8155ff7..628831c3ad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 441ad94786..7b010cbdb0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index f08f8eb6d8..4ce1a27a06 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -52,7 +52,14 @@ * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... * ... ... ... ... ... ... ... ... ... * - * FIXME: Code generate stencil test + * + * Stencil test: + * Two-sided stencil test is supported but probably not as efficient as + * it could be. Currently, we use if/then/else constructs to do the + * operations for front vs. back-facing polygons. We could probably do + * both the front and back arithmetic then use a Select() instruction to + * choose the result depending on polyon orientation. We'd have to + * measure performance both ways and see which is better. * * @author Jose Fonseca <jfonseca@vmware.com> */ @@ -61,11 +68,264 @@ #include "util/u_format.h" #include "lp_bld_type.h" +#include "lp_bld_arit.h" #include "lp_bld_const.h" #include "lp_bld_logic.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" #include "lp_bld_depth.h" +#include "lp_bld_swizzle.h" + + +/** Used to select fields from pipe_stencil_state */ +enum stencil_op { + S_FAIL_OP, + Z_FAIL_OP, + Z_PASS_OP +}; + + + +/** + * Do the stencil test comparison (compare FB stencil values against ref value). + * This will be used twice when generating two-sided stencil code. + * \param stencil the front/back stencil state + * \param stencilRef the stencil reference value, replicated as a vector + * \param stencilVals vector of stencil values from framebuffer + * \return vector mask of pass/fail values (~0 or 0) + */ +static LLVMValueRef +lp_build_stencil_test_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(type.sign); + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); + stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); + } + + res = lp_build_cmp(bld, stencil->func, stencilVals, stencilRef); + + return res; +} + + +/** + * Do the one or two-sided stencil test comparison. + * \sa lp_build_stencil_test_single + * \param face an integer indicating front (+) or back (-) facing polygon. + * If NULL, assume front-facing. + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef face) +{ + LLVMValueRef res; + + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided test */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + res = result; + } + else { + /* do single-side test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + enum stencil_op op, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals, + LLVMValueRef mask) + +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + unsigned stencil_op; + + assert(type.sign); + + switch (op) { + case S_FAIL_OP: + stencil_op = stencil->fail_op; + break; + case Z_FAIL_OP: + stencil_op = stencil->zfail_op; + break; + case Z_PASS_OP: + stencil_op = stencil->zpass_op; + break; + default: + assert(0 && "Invalid stencil_op mode"); + stencil_op = PIPE_STENCIL_OP_KEEP; + } + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + /* we can return early for this case */ + return res; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + break; + case PIPE_STENCIL_OP_REPLACE: + res = stencilRef; + break; + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + break; + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(bld->builder, stencilVals, ""); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + default: + assert(0 && "bad stencil op mode"); + res = NULL; + } + + if (stencil->writemask != stencilMax) { + /* compute res = (res & mask) | (stencilVals & ~mask) */ + LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); + LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); + LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); + LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); + res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + } + + /* only the update the vector elements enabled by 'mask' */ + res = lp_build_select(bld, mask, res, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test op/update. + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + enum stencil_op op, + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef mask, + LLVMValueRef face) + +{ + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided op */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals, mask); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + return result; + } + else { + /* do single-sided op */ + return lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } +} + /** @@ -109,105 +369,303 @@ lp_depth_type(const struct util_format_description *format_desc, /** - * Depth test. + * Compute bitmask and bit shift to apply to the incoming fragment Z values + * and the Z buffer values needed before doing the Z comparison. + * + * Note that we leave the Z bits in the position that we find them + * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us + * get by with fewer bit twiddling steps. */ -void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr) +static boolean +get_z_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) { - struct lp_build_context bld; + const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; - LLVMValueRef dst; - LLVMValueRef z_bitmask = NULL; - LLVMValueRef test; - - if(!state->enabled) - return; - + int chan; + unsigned padding_left, padding_right; + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); z_swizzle = format_desc->swizzle[0]; - if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) - return; - /* Sanity checking */ - assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if(type.floating) { - assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + padding_right = 0; + for (chan = 0; chan < z_swizzle; ++chan) + padding_right += format_desc->channel[chan].size; + + padding_left = + total_bits - (padding_right + format_desc->channel[z_swizzle].size); + + if (padding_left || padding_right) { + unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; + unsigned long long mask_right = (1ULL << (padding_right)) - 1; + *mask = mask_left ^ mask_right; } else { - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + *mask = 0xffffffff; + } + + *shift = padding_left; + + return TRUE; +} + + +/** + * Compute bitmask and bit shift to apply to the framebuffer pixel values + * to put the stencil bits in the least significant position. + * (i.e. 0x000000ff) + */ +static boolean +get_s_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + unsigned s_swizzle; + int chan, sz; + + s_swizzle = format_desc->swizzle[1]; + + if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *shift = 0; + for (chan = 0; chan < s_swizzle; chan++) + *shift += format_desc->channel[chan].size; + + sz = format_desc->channel[s_swizzle].size; + *mask = (1U << sz) - 1U; + + return TRUE; +} + + + +/** + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically a 2x2 quad). + * + * \param depth the depth test state + * \param stencil the front/back stencil state + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param stencil_refs the front/back stencil ref values (scalar) + * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param zs_dst_ptr pointer to depth/stencil values in framebuffer + * \param facing contains float value indicating front/back facing polygon + */ +void +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef face) +{ + struct lp_build_context bld; + struct lp_build_context sbld; + struct lp_type s_type; + LLVMValueRef zs_dst, z_dst = NULL; + LLVMValueRef stencil_vals = NULL; + LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; + LLVMValueRef z_pass = NULL, s_pass_mask = NULL; + LLVMValueRef orig_mask = mask->value; + + /* Sanity checking */ + { + const unsigned z_swizzle = format_desc->swizzle[0]; + const unsigned s_swizzle = format_desc->swizzle[1]; + + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || + s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + assert(depth->enabled || stencil[0].enabled); + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (stencil[0].enabled) { + assert(format_desc->format == PIPE_FORMAT_Z24S8_UNORM || + format_desc->format == PIPE_FORMAT_S8Z24_UNORM); + } + + assert(z_swizzle < 4); + assert(format_desc->block.bits == type.width); + if (type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == + format_desc->block.bits); + } + else { + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!type.fixed); + assert(!type.sign); + assert(type.norm); + } } - /* Setup build context */ + + /* Setup build context for Z vals */ lp_build_context_init(&bld, builder, type); - dst = LLVMBuildLoad(builder, dst_ptr, ""); + /* Setup build context for stencil vals */ + s_type = lp_type_int_vec(type.width); + lp_build_context_init(&sbld, builder, s_type); + + /* Load current z/stencil value from z/stencil buffer */ + zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); + + lp_build_name(zs_dst, "zsbufval"); - lp_build_name(dst, "zsbuf"); - /* Align the source depth bits with the destination's, and mask out any - * stencil or padding bits from both */ - if(format_desc->channel[z_swizzle].size == format_desc->block.bits) { - assert(z_swizzle == 0); - /* nothing to do */ + /* Compute and apply the Z/stencil bitmasks and shifts. + */ + { + unsigned z_shift, z_mask; + unsigned s_shift, s_mask; + + if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + + if (z_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); + z_src = LLVMBuildAnd(builder, z_src, mask, ""); + z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); + z_bitmask = mask; /* used below */ + } + else { + z_dst = zs_dst; + } + + lp_build_name(z_dst, "zsbuf.z"); + } + + if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (s_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_shift = shift; /* used below */ + } + else { + stencil_vals = zs_dst; + } + + if (s_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); + } + + lp_build_name(stencil_vals, "stencil"); + } } - else { - unsigned padding_left; - unsigned padding_right; - unsigned chan; - - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); - assert(format_desc->channel[z_swizzle].normalized); - - padding_right = 0; - for(chan = 0; chan < z_swizzle; ++chan) - padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - - (padding_right + format_desc->channel[z_swizzle].size); - - if(padding_left || padding_right) { - const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; - const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); + + + if (stencil[0].enabled) { + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + + s_pass_mask = lp_build_stencil_test(&sbld, stencil, + stencil_refs, stencil_vals, face); + + /* apply stencil-fail operator */ + { + LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + stencil_refs, stencil_vals, + s_fail_mask, face); + } + } + + if (depth->enabled) { + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + + if (!stencil[0].enabled) { + /* We can potentially skip all remaining operations here, but only + * if stencil is disabled because we still need to update the stencil + * buffer values. Don't need to update Z buffer values. + */ + lp_build_mask_update(mask, z_pass); + } + + if (depth->writemask) { + if(z_bitmask) + z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + else + z_bitmask = mask->value; + + z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst); } - if(padding_left) - src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); - if(padding_right) - src = LLVMBuildAnd(builder, src, z_bitmask, ""); - if(padding_left || padding_right) - dst = LLVMBuildAnd(builder, dst, z_bitmask, ""); + if (stencil[0].enabled) { + /* update stencil buffer values according to z pass/fail result */ + LLVMValueRef z_fail_mask, z_pass_mask; + + /* apply Z-fail operator */ + z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + stencil_refs, stencil_vals, + z_fail_mask, face); + + /* apply Z-pass operator */ + z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + z_pass_mask, face); + } + } + else { + /* No depth test: apply Z-pass operator to stencil buffer values which + * passed the stencil test. + */ + s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + s_pass_mask, face); } - lp_build_name(dst, "zsbuf.z"); + /* The Z bits are already in the right place but we may need to shift the + * stencil bits before ORing Z with Stencil to make the final pixel value. + */ + if (stencil_vals && stencil_shift) + stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_shift, ""); - test = lp_build_cmp(&bld, state->func, src, dst); - lp_build_mask_update(mask, test); + /* Finally, merge/store the z/stencil values */ + if ((depth->enabled && depth->writemask) || + (stencil[0].enabled && stencil[0].writemask)) { - if(state->writemask) { - if(z_bitmask) - z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + if (z_dst && stencil_vals) + zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + else if (z_dst) + zs_dst = z_dst; else - z_bitmask = mask->value; + zs_dst = stencil_vals; - dst = lp_build_select(&bld, z_bitmask, src, dst); - LLVMBuildStore(builder, dst, dst_ptr); + LLVMBuildStore(builder, zs_dst, zs_dst_ptr); } + + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); + + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 8be80024ae..27dd46b625 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_depth_state; @@ -51,13 +51,16 @@ lp_depth_type(const struct util_format_description *format_desc, void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr); +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef zs_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef facing); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index e158836549..c2b50e1b60 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 8972c0dc17..73ab6de3f2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index abb27e4c32..45ee4b12ce 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -114,10 +114,10 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, case UTIL_FORMAT_TYPE_UNSIGNED: if(type.floating) { if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); if(stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; - input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); } if(format_desc->channel[chan].normalized) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c index 2fc894017d..09efb16121 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.c @@ -289,17 +289,17 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) /* top-right or bottom-right quad in block */ /* build x += xstep */ x = lp_build_add(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } if (quad_index == 2) { /* bottom-left quad in block */ /* build y += ystep */ y = lp_build_add(&bld->base, y, - lp_build_const_scalar(bld->base.type, ystep)); + lp_build_const_vec(bld->base.type, ystep)); /* build x -= xstep */ x = lp_build_sub(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } lp_build_name(x, "pos.x"); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index 177b5e943e..a4937bbb04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 7d5506c733..977f767322 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index f3df3dd138..a3b6970116 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -193,7 +193,7 @@ lp_build_compare(LLVMBuilderRef builder, if(table[func].gt && ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { - LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); } @@ -483,3 +483,13 @@ lp_build_alloca(struct lp_build_context *bld) return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); } } + + +/** Return (a & ~b) */ +LLVMValueRef +lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) +{ + b = LLVMBuildNot(bld->builder, b, ""); + b = LLVMBuildAnd(bld->builder, a, b, ""); + return b; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index b54ec13b70..00a8c75019 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ @@ -79,4 +79,9 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_alloca(struct lp_build_context *bld); + +LLVMValueRef +lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); + + #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 23398f41f9..2daa8a3b58 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -164,7 +164,7 @@ lp_build_unpack2(LLVMBuilderRef builder, if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ - msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), ""); } else /* Most significant bits always zero */ @@ -361,7 +361,7 @@ lp_build_packs2(LLVMBuilderRef builder, if(clamp) { struct lp_build_context bld; unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1); lp_build_context_init(&bld, builder, src_type); lo = lp_build_min(&bld, lo, dst_max); hi = lp_build_min(&bld, hi, dst_max); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 346a17d580..41adeed220 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 672e06c992..0761e35f8e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -173,7 +173,7 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef x_stride; LLVMValueRef offset; - x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { LLVMValueRef x_lo, x_hi; @@ -195,9 +195,9 @@ lp_build_sample_offset(struct lp_build_context *bld, y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8); - x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8); y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index d35fdbb78d..fcbf084baf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,7 +36,7 @@ #define LP_BLD_SAMPLE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_resource; struct pipe_sampler_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 72018737a8..995c016b9d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -292,7 +292,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, int chan; for (chan = 0; chan < 4; chan++) { LLVMValueRef border_chan = - lp_build_const_scalar(bld->texel_type, + lp_build_const_vec(bld->texel_type, bld->static_state->border_color[chan]); texel[chan] = lp_build_select(&bld->texel_bld, use_border, border_chan, texel[chan]); @@ -457,8 +457,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); - LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -512,7 +512,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, else { LLVMValueRef min, max; /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); } @@ -533,7 +533,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -545,7 +545,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); coord = lp_build_sub(coord_bld, coord, half); @@ -620,7 +620,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef min, max; /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -665,7 +665,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -708,7 +708,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -724,7 +724,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = length - min */ max = lp_build_sub(coord_bld, length_f, min); @@ -733,7 +733,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -843,87 +843,98 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef depth) { - const int dims = texture_dims(bld->static_state->target); - struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); - LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); - LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); - - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = LLVMBuildSub(bld->builder, r1, r0, ""); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildSub(bld->builder, r2, r0, ""); - drdy = lp_build_abs(float_bld, drdy); - } + if (bld->static_state->min_lod == bld->static_state->max_lod) { + /* User is forcing sampling from a particular mipmap level. + * This is hit during mipmap generation. + */ + return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); } + else { + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->max_lod); + + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + + LLVMValueRef s0, s1, s2; + LLVMValueRef t0, t1, t2; + LLVMValueRef r0, r1, r2; + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho, lod; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + * XXX we're assuming a four-element quad in 2x2 layout here. + */ + s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); + s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); + s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); + dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); + t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); + t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); + dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); + r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); + r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); + drdx = LLVMBuildSub(bld->builder, r1, r0, ""); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildSub(bld->builder, r2, r0, ""); + drdy = lp_build_abs(float_bld, drdy); + } + } - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } } - } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); - /* add lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + /* add lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); - /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); - return lod; + return lod; + } } @@ -986,7 +997,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, last_level); /* compute level 1 and clamp to legal range of levels */ *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); - *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero); + *level1_out = lp_build_min(int_bld, *level1_out, last_level); *weight_out = lp_build_fract(float_bld, lod); } @@ -1215,7 +1226,7 @@ static LLVMValueRef lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) { /* ima = -0.5 / abs(coord); */ - LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5); + LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, lp_build_rcp(coord_bld, absCoord)); @@ -1235,7 +1246,7 @@ lp_build_cube_coord(struct lp_build_context *coord_bld, LLVMValueRef coord, LLVMValueRef ima) { /* return negate(coord) * ima * sign + 0.5; */ - LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); LLVMValueRef res; assert(negate_coord == +1 || negate_coord == -1); @@ -1414,6 +1425,85 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, /** + * Sample the texture/mipmap using given image filter and mip filter. + * data0_ptr and data1_ptr point to the two mipmap levels to sample + * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. + * If we're using nearest miplevel sampling the '1' values will be null/unused. + */ +static void +lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned img_filter, + unsigned mip_filter, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef lod_fpart, + LLVMValueRef width0_vec, + LLVMValueRef width1_vec, + LLVMValueRef height0_vec, + LLVMValueRef height1_vec, + LLVMValueRef depth0_vec, + LLVMValueRef depth1_vec, + LLVMValueRef row_stride0_vec, + LLVMValueRef row_stride1_vec, + LLVMValueRef img_stride0_vec, + LLVMValueRef img_stride1_vec, + LLVMValueRef data_ptr0, + LLVMValueRef data_ptr1, + LLVMValueRef *colors_out) +{ + LLVMValueRef colors0[4], colors1[4]; + int chan; + + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + else { + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } +} + + + +/** * General texture sampling codegen. * This function handles texture sampling for all texture targets (1D, * 2D, 3D, cube) and all filtering modes. @@ -1435,6 +1525,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef data_array, LLVMValueRef *colors_out) { + struct lp_build_context *float_bld = &bld->float_bld; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; @@ -1446,7 +1537,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; LLVMValueRef data_ptr0, data_ptr1; - int chan; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -1454,16 +1544,24 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ /* - * Compute the level of detail (mipmap level index(es)). + * Compute the level of detail (float). + */ + if (min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* Need to compute lod either to choose mipmap levels or to + * distinguish between minification/magnification with one mipmap level. + */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + } + + /* + * Compute integer mipmap level(s) to fetch texels from. */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* always use mip level 0 */ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); } else { - /* compute float LOD */ - lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); - if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); } @@ -1499,7 +1597,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at ilevel1 */ + /* compute width, height, depth for second mipmap level at 'ilevel1' */ width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); if (dims >= 2) { height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); @@ -1516,7 +1614,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } /* - * Choose cube face, recompute texcoords. + * Choose cube face, recompute per-face texcoords. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef face, face_s, face_t; @@ -1538,62 +1636,67 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* * Get/interpolate texture colors. */ - /* XXX temporarily force this path: */ - if (1 /*min_filter == mag_filter*/) { - /* same filter for minification or magnification */ - LLVMValueRef colors0[4], colors1[4]; - - if (min_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, - row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } - } - else { - assert(min_filter == PIPE_TEX_FILTER_LINEAR); + if (min_filter == mag_filter) { + /* no need to distinquish between minification and magnification */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + else { + /* Emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef minify; - lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, - row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } - } + /* minify = lod > 0.0 */ + minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + lod, float_bld->zero, ""); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, - colors0[chan], colors1[chan]); - } + lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + { + /* Use the minification filter */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); } - else { - /* use first/only level's colors */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = colors0[chan]; - } + lp_build_else(&if_ctx); + { + /* Use the magnification filter */ + lp_build_sample_mipmap(bld, mag_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); } - } - else { - /* emit conditional to choose min image filter or mag image filter - * depending on the lod being >0 or <= 0, respectively. - */ - abort(); + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); } } @@ -1605,7 +1708,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, LLVMValueRef packed, LLVMValueRef *rgba) { - LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); unsigned chan; /* Decode the input vector components */ @@ -1617,7 +1720,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, input = packed; if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), ""); if(stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); @@ -1679,17 +1782,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_int_const_scalar(i32.type, -128); + i32_c128 = lp_build_const_int_vec(i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); t = LLVMBuildAdd(builder, t, i32_c128, ""); /* compute floor (shift right 8) */ - i32_c8 = lp_build_int_const_scalar(i32.type, 8); + i32_c8 = lp_build_const_int_vec(i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_int_const_scalar(i32.type, 255); + i32_c255 = lp_build_const_int_vec(i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -1856,7 +1959,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } assert(res); - res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25)); /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ for(chan = 0; chan < 3; ++chan) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 34478c10f5..147336edb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 64e81f7b1f..278c838eac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -144,9 +144,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld, #endif if(shift > 0) - tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); + tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), ""); if(shift < 0) - tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); + tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), ""); assert(tmp); if(tmp) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 57b5cc079f..138ca620e6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 0f2f8a65b1..63b938bfa9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5ec59d636c..f160be878f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -475,7 +475,7 @@ emit_store( break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; @@ -996,7 +996,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp1 = lp_build_const_vec(bld->base.type, 0.5); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); } @@ -1713,7 +1713,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert(num_immediates < LP_MAX_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = - lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); + lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); for( i = size; i < 4; ++i ) bld.immediates[num_immediates][i] = bld.base.undef; num_immediates++; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 5b351476ac..cd59d2faa6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,9 +37,9 @@ #define LP_BLD_TYPE_H -#include "os/os_llvm.h" +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> /** |