diff options
author | Tim Rowley <timothy.o.rowley@intel.com> | 2016-04-05 16:33:02 -0600 |
---|---|---|
committer | Tim Rowley <timothy.o.rowley@intel.com> | 2016-04-22 18:48:38 -0500 |
commit | ef293ee9c0034bce980c978e0e41a8ab2a9730d7 (patch) | |
tree | 5157579cbb38f118423976f3bc3f4a0ef308fa93 /src | |
parent | 27cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4e (diff) |
swr: [rasterizer] Interpolation utility functions
v2: use _mm_cmpunord_ps for vIsNaN
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 51 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 2 |
3 files changed, 55 insertions, 6 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index fa792b42e1a..72fe15a3c7a 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -915,16 +915,25 @@ INLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simdscal } ////////////////////////////////////////////////////////////////////////// +/// @brief Compute plane equation vA * vX + vB * vY + vC +INLINE __m128 vplaneps128(__m128 vA, __m128 vB, __m128 vC, __m128 &vX, __m128 &vY) +{ + __m128 vOut = _simd128_fmadd_ps(vA, vX, vC); + vOut = _simd128_fmadd_ps(vB, vY, vOut); + return vOut; +} + +////////////////////////////////////////////////////////////////////////// /// @brief Interpolates a single component. /// @param vI - barycentric I /// @param vJ - barycentric J /// @param pInterpBuffer - pointer to attribute barycentric coeffs -template<UINT Attrib, UINT Comp> +template<UINT Attrib, UINT Comp, UINT numComponents = 4> static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, const float *pInterpBuffer) { - const float *pInterpA = &pInterpBuffer[Attrib * 12 + 0 + Comp]; - const float *pInterpB = &pInterpBuffer[Attrib * 12 + 4 + Comp]; - const float *pInterpC = &pInterpBuffer[Attrib * 12 + 8 + Comp]; + const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp]; + const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp]; + const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp]; simdscalar vA = _simd_broadcast_ss(pInterpA); simdscalar vB = _simd_broadcast_ss(pInterpB); @@ -936,6 +945,40 @@ static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, cons return vplaneps(vA, vB, vC, vI, vJ); } +////////////////////////////////////////////////////////////////////////// +/// @brief Interpolates a single component. +/// @param vI - barycentric I +/// @param vJ - barycentric J +/// @param pInterpBuffer - pointer to attribute barycentric coeffs +template<UINT Attrib, UINT Comp, UINT numComponents = 4> +static INLINE __m128 InterpolateComponent(__m128 vI, __m128 vJ, const float *pInterpBuffer) +{ + const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp]; + const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp]; + const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp]; + + __m128 vA = _mm_broadcast_ss(pInterpA); + __m128 vB = _mm_broadcast_ss(pInterpB); + __m128 vC = _mm_broadcast_ss(pInterpC); + + __m128 vk = _mm_sub_ps(_mm_sub_ps(_mm_set1_ps(1.0f), vI), vJ); + vC = _mm_mul_ps(vk, vC); + + return vplaneps128(vA, vB, vC, vI, vJ); +} + +static INLINE __m128 _simd128_abs_ps(__m128 a) +{ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_and_si128(ai, _mm_set1_epi32(0x7fffffff))); +} + +static INLINE simdscalar _simd_abs_ps(simdscalar a) +{ + simdscalari ai = _simd_castps_si(a); + return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff))); +} + INLINE UINT pdep_u32(UINT a, UINT mask) { diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 8307c0bd2a7..b637785dcc5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -307,6 +307,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC) !state.rastState.pointSpriteEnable); } +INLINE +bool vIsNaN(const __m128& vec) +{ + const __m128 result = _mm_cmpunord_ps(vec, vec); + const int32_t mask = _mm_movemask_ps(result); + return (mask != 0); +} + uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements); uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 50361068025..88ec4b02033 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -197,8 +197,6 @@ enum SWR_OUTER_TESSFACTOR_ID #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here -static_assert(VERTEX_POINT_SIZE_SLOT < KNOB_NUM_ATTRIBUTES, "Mismatched attribute slot size"); - // SoAoSoA struct simdvertex { |