diff options
author | Jose Fonseca <jfonseca@vmware.com> | 2015-08-07 17:16:52 +0100 |
---|---|---|
committer | Jose Fonseca <jfonseca@vmware.com> | 2015-08-08 23:20:49 +0100 |
commit | 1eb9029d752b12c3bacb6399bd78c2c63dfdda3c (patch) | |
tree | a2692193ecef52e9e3fe6f72c9e4a19ef6a00897 /thirdparty | |
parent | 735805df70bc64af1e5b709133afb76499a92ee1 (diff) |
directxtex: Update to Jul 2015 release.
With several local modifications for MinGW compatibility sake, including
XNAMath fallback support.
The main driver for this upgrade is to build errors with VS 2015.
Diffstat (limited to 'thirdparty')
32 files changed, 10896 insertions, 2362 deletions
diff --git a/thirdparty/directxtex/CMakeLists.txt b/thirdparty/directxtex/CMakeLists.txt index e971a963..6ed1d7e5 100644 --- a/thirdparty/directxtex/CMakeLists.txt +++ b/thirdparty/directxtex/CMakeLists.txt @@ -15,8 +15,11 @@ if (DirectX_D3D11_INCLUDE_FOUND) endif () if (MINGW) - # MinGW wincodec.h is incomplete. Use our own. - include_directories (BEFORE ${CMAKE_SOURCE_DIR}/thirdparty/mingw/wincodec) + # Several MinGW headers are lacking. Use our own. + include_directories (BEFORE + ${CMAKE_SOURCE_DIR}/thirdparty/mingw/wincodec + ${CMAKE_SOURCE_DIR}/thirdparty/mingw/wrl + ) include_directories (BEFORE ${CMAKE_SOURCE_DIR}/dispatch # for compat.h @@ -61,8 +64,8 @@ if (DirectX_D3D11_INCLUDE_FOUND) ) install ( - FILES "Microsoft Public License.rtf" + FILES MIT.txt DESTINATION ${DOC_INSTALL_DIR} - RENAME LICENSE-directxtex.rtf + RENAME LICENSE-directxtex.txt ) endif () diff --git a/thirdparty/directxtex/DirectXTex/BC.cpp b/thirdparty/directxtex/DirectXTex/BC.cpp index c47296df..2d03d2b2 100644 --- a/thirdparty/directxtex/DirectXTex/BC.cpp +++ b/thirdparty/directxtex/DirectXTex/BC.cpp @@ -63,7 +63,7 @@ inline static uint16_t Encode565(_In_ const HDRColorA *pColor) //------------------------------------------------------------------------------------- static void OptimizeRGB(_Out_ HDRColorA *pX, _Out_ HDRColorA *pY, - _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pPoints, _In_ size_t cSteps, _In_ DWORD flags) + _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pPoints, _In_ size_t cSteps, _In_ DWORD flags) { static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f); static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f }; @@ -254,7 +254,7 @@ static void OptimizeRGB(_Out_ HDRColorA *pX, _Out_ HDRColorA *pY, size_t iStep; if(fDot <= 0.0f) iStep = 0; - if(fDot >= fSteps) + else if(fDot >= fSteps) iStep = cSteps - 1; else iStep = static_cast<size_t>(fDot + 0.5f); @@ -317,7 +317,7 @@ static void OptimizeRGB(_Out_ HDRColorA *pX, _Out_ HDRColorA *pY, //------------------------------------------------------------------------------------- -inline static void DecodeBC1( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_ const D3DX_BC1 *pBC ) +inline static void DecodeBC1( _Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_ const D3DX_BC1 *pBC, _In_ bool isbc1 ) { assert( pColor && pBC ); static_assert( sizeof(D3DX_BC1) == 8, "D3DX_BC1 should be 8 bytes" ); @@ -330,14 +330,14 @@ inline static void DecodeBC1( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor clr0 = XMVectorMultiply( clr0, s_Scale ); clr1 = XMVectorMultiply( clr1, s_Scale ); - clr0 = XMVectorSwizzle( clr0, 2, 1, 0, 3 ); - clr1 = XMVectorSwizzle( clr1, 2, 1, 0, 3 ); + clr0 = XMVectorSwizzle<2, 1, 0, 3>( clr0 ); + clr1 = XMVectorSwizzle<2, 1, 0, 3>( clr1 ); clr0 = XMVectorSelect( g_XMIdentityR3, clr0, g_XMSelect1110 ); clr1 = XMVectorSelect( g_XMIdentityR3, clr1, g_XMSelect1110 ); XMVECTOR clr2, clr3; - if(pBC->rgb[0] <= pBC->rgb[1]) + if ( isbc1 && (pBC->rgb[0] <= pBC->rgb[1]) ) { clr2 = XMVectorLerp( clr0, clr1, 0.5f ); clr3 = XMVectorZero(); // Alpha of 0 @@ -366,9 +366,8 @@ inline static void DecodeBC1( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor //------------------------------------------------------------------------------------- -#pragma warning(disable: 4616 6001 6201) -static void EncodeBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor, +static void EncodeBC1(_Out_ D3DX_BC1 *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor, _In_ bool bColorKey, _In_ float alphaRef, _In_ DWORD flags) { assert( pBC && pColor ); @@ -446,7 +445,7 @@ static void EncodeBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) co if(3 != (i & 3)) { assert( i < 15 ); - __analysis_assume( i < 15 ); + _Analysis_assume_( i < 15 ); Error[i + 1].r += Diff.r * (7.0f / 16.0f); Error[i + 1].g += Diff.g * (7.0f / 16.0f); Error[i + 1].b += Diff.b * (7.0f / 16.0f); @@ -468,7 +467,7 @@ static void EncodeBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) co if(3 != (i & 3)) { assert( i < 11 ); - __analysis_assume(i < 11 ); + _Analysis_assume_( i < 11 ); Error[i + 5].r += Diff.r * (1.0f / 16.0f); Error[i + 5].g += Diff.g * (1.0f / 16.0f); Error[i + 5].b += Diff.b * (1.0f / 16.0f); @@ -677,7 +676,7 @@ static void EncodeBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) co //------------------------------------------------------------------------------------- #ifdef COLOR_WEIGHTS -static void EncodeSolidBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor) +static void EncodeSolidBC1(_Out_ D3DX_BC1 *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor) { #ifdef COLOR_AVG_0WEIGHTS // Compute avg color @@ -717,12 +716,14 @@ static void EncodeSolidBC1(_Out_ D3DX_BC1 *pBC, _In_count_c_(NUM_PIXELS_PER_BLOC //------------------------------------------------------------------------------------- // BC1 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC1(XMVECTOR *pColor, const uint8_t *pBC) { - const D3DX_BC1 *pBC1 = reinterpret_cast<const D3DX_BC1 *>(pBC); - DecodeBC1( pColor, pBC1 ); + auto pBC1 = reinterpret_cast<const D3DX_BC1 *>(pBC); + DecodeBC1( pColor, pBC1, true ); } +_Use_decl_annotations_ void D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, float alphaRef, DWORD flags) { assert( pBC && pColor ); @@ -751,7 +752,7 @@ void D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, float alphaRef, DWORD f if(3 != (i & 3)) { assert( i < 15 ); - __analysis_assume( i < 15 ); + _Analysis_assume_( i < 15 ); fError[i + 1] += fDiff * (7.0f / 16.0f); } @@ -765,7 +766,7 @@ void D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, float alphaRef, DWORD f if(3 != (i & 3)) { assert( i < 11 ); - __analysis_assume( i < 11 ); + _Analysis_assume_( i < 11 ); fError[i + 5] += fDiff * (1.0f / 16.0f); } } @@ -779,7 +780,7 @@ void D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, float alphaRef, DWORD f } } - D3DX_BC1 *pBC1 = reinterpret_cast<D3DX_BC1 *>(pBC); + auto pBC1 = reinterpret_cast<D3DX_BC1 *>(pBC); EncodeBC1(pBC1, Color, true, alphaRef, flags); } @@ -787,21 +788,25 @@ void D3DXEncodeBC1(uint8_t *pBC, const XMVECTOR *pColor, float alphaRef, DWORD f //------------------------------------------------------------------------------------- // BC2 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC2(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); static_assert( sizeof(D3DX_BC2) == 16, "D3DX_BC2 should be 16 bytes" ); - const D3DX_BC2 *pBC2 = reinterpret_cast<const D3DX_BC2 *>(pBC); + auto pBC2 = reinterpret_cast<const D3DX_BC2 *>(pBC); // RGB part - DecodeBC1(pColor, &pBC2->bc1); + DecodeBC1(pColor, &pBC2->bc1, false); // 4-bit alpha part DWORD dw = pBC2->bitmap[0]; for(size_t i = 0; i < 8; ++i, dw >>= 4) + { + #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool") pColor[i] = XMVectorSetW( pColor[i], (float) (dw & 0xf) * (1.0f / 15.0f) ); + } dw = pBC2->bitmap[1]; @@ -809,6 +814,7 @@ void D3DXDecodeBC2(XMVECTOR *pColor, const uint8_t *pBC) pColor[i] = XMVectorSetW( pColor[i], (float) (dw & 0xf) * (1.0f / 15.0f) ); } +_Use_decl_annotations_ void D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { assert( pBC && pColor ); @@ -820,7 +826,7 @@ void D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &Color[i] ), pColor[i] ); } - D3DX_BC2 *pBC2 = reinterpret_cast<D3DX_BC2 *>(pBC); + auto pBC2 = reinterpret_cast<D3DX_BC2 *>(pBC); // 4-bit alpha part. Dithered using Floyd Stienberg error diffusion. pBC2->bitmap[0] = 0; @@ -848,7 +854,7 @@ void D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) if(3 != (i & 3)) { assert( i < 15 ); - __analysis_assume( i < 15 ); + _Analysis_assume_( i < 15 ); fError[i + 1] += fDiff * (7.0f / 16.0f); } @@ -862,7 +868,7 @@ void D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) if(3 != (i & 3)) { assert( i < 11 ); - __analysis_assume( i < 11 ); + _Analysis_assume_( i < 11 ); fError[i + 5] += fDiff * (1.0f / 16.0f); } } @@ -885,15 +891,16 @@ void D3DXEncodeBC2(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) //------------------------------------------------------------------------------------- // BC3 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC3(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); static_assert( sizeof(D3DX_BC3) == 16, "D3DX_BC3 should be 16 bytes" ); - const D3DX_BC3 *pBC3 = reinterpret_cast<const D3DX_BC3 *>(pBC); + auto pBC3 = reinterpret_cast<const D3DX_BC3 *>(pBC); // RGB part - DecodeBC1(pColor, &pBC3->bc1); + DecodeBC1(pColor, &pBC3->bc1, false); // Adaptive 3-bit alpha part float fAlpha[8]; @@ -926,6 +933,7 @@ void D3DXDecodeBC3(XMVECTOR *pColor, const uint8_t *pBC) pColor[i] = XMVectorSetW( pColor[i], fAlpha[dw & 0x7] ); } +_Use_decl_annotations_ void D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { assert( pBC && pColor ); @@ -937,7 +945,7 @@ void D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) XMStoreFloat4( reinterpret_cast<XMFLOAT4*>( &Color[i] ), pColor[i] ); } - D3DX_BC3 *pBC3 = reinterpret_cast<D3DX_BC3 *>(pBC); + auto pBC3 = reinterpret_cast<D3DX_BC3 *>(pBC); // Quantize block to A8, using Floyd Stienberg error diffusion. This // increases the chance that colors will map directly to the quantized @@ -971,7 +979,7 @@ void D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) if(3 != (i & 3)) { assert( i < 15 ); - __analysis_assume( i < 15 ); + _Analysis_assume_( i < 15 ); fError[i + 1] += fDiff * (7.0f / 16.0f); } @@ -985,7 +993,7 @@ void D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) if(3 != (i & 3)) { assert( i < 11 ); - __analysis_assume( i < 11 ); + _Analysis_assume_( i < 11 ); fError[i + 5] += fDiff * (1.0f / 16.0f); } } @@ -1128,4 +1136,4 @@ void D3DXEncodeBC3(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) } } -} // namespace
\ No newline at end of file +} // namespace diff --git a/thirdparty/directxtex/DirectXTex/BC.h b/thirdparty/directxtex/DirectXTex/BC.h index 638058ea..26f999a3 100644 --- a/thirdparty/directxtex/DirectXTex/BC.h +++ b/thirdparty/directxtex/DirectXTex/BC.h @@ -13,9 +13,7 @@ // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once -#endif #include <assert.h> @@ -28,11 +26,6 @@ #include <float.h> -#pragma warning(push) -#pragma warning(disable : 4005) -#include <stdint.h> -#pragma warning(pop) - namespace DirectX { @@ -50,7 +43,7 @@ const uint16_t F16S_MASK = 0x8000; // f16 sign mask const uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask const uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF -#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) +#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) // Because these are used in SAL annotations, they need to remain macros rather than const values #define NUM_PIXELS_PER_BLOCK 16 @@ -65,9 +58,9 @@ const size_t BC6H_MAX_SHAPES = 32; const size_t BC7_NUM_CHANNELS = 4; const size_t BC7_MAX_SHAPES = 64; -const uint32_t BC67_WEIGHT_MAX = 64; +const int32_t BC67_WEIGHT_MAX = 64; const uint32_t BC67_WEIGHT_SHIFT = 6; -const uint32_t BC67_WEIGHT_ROUND = 32; +const int32_t BC67_WEIGHT_ROUND = 32; extern const int g_aWeights2[4]; extern const int g_aWeights3[8]; @@ -79,6 +72,7 @@ enum BC_FLAGS BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3 BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3 BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting + BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back }; //------------------------------------------------------------------------------------- @@ -91,7 +85,7 @@ class LDRColorA public: uint8_t r, g, b, a; - LDRColorA() {} + LDRColorA() DIRECTX_CTOR_DEFAULT LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {} const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const @@ -120,14 +114,14 @@ public: LDRColorA operator = (_In_ const HDRColorA& c); - static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wcprec, _Out_ LDRColorA& out) + static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ _In_range_(2, 4) size_t wcprec, _Out_ LDRColorA& out) { const int* aWeights = nullptr; switch(wcprec) { - case 2: aWeights = g_aWeights2; assert( wc < 4 ); __analysis_assume( wc < 4 ); break; - case 3: aWeights = g_aWeights3; assert( wc < 8 ); __analysis_assume( wc < 8 ); break; - case 4: aWeights = g_aWeights4; assert( wc < 16 ); __analysis_assume( wc < 16 ); break; + case 2: aWeights = g_aWeights2; assert( wc < 4 ); _Analysis_assume_( wc < 4 ); break; + case 3: aWeights = g_aWeights3; assert( wc < 8 ); _Analysis_assume_( wc < 8 ); break; + case 4: aWeights = g_aWeights4; assert( wc < 16 ); _Analysis_assume_( wc < 16 ); break; default: assert(false); out.r = out.g = out.b = 0; return; } out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); @@ -135,33 +129,35 @@ public: out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); } - static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_ size_t waprec, _Out_ LDRColorA& out) + static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_range_(2, 4) _In_ size_t waprec, _Out_ LDRColorA& out) { const int* aWeights = nullptr; switch(waprec) { - case 2: aWeights = g_aWeights2; assert( wa < 4 ); __analysis_assume( wa < 4 ); break; - case 3: aWeights = g_aWeights3; assert( wa < 8 ); __analysis_assume( wa < 8 ); break; - case 4: aWeights = g_aWeights4; assert( wa < 16 ); __analysis_assume( wa < 16 ); break; + case 2: aWeights = g_aWeights2; assert( wa < 4 ); _Analysis_assume_( wa < 4 ); break; + case 3: aWeights = g_aWeights3; assert( wa < 8 ); _Analysis_assume_( wa < 8 ); break; + case 4: aWeights = g_aWeights4; assert( wa < 16 ); _Analysis_assume_( wa < 16 ); break; default: assert(false); out.a = 0; return; } out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); } - static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ size_t wcprec, _In_ size_t waprec, _Out_ LDRColorA& out) + static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ _In_range_(2, 4) size_t wcprec, _In_ _In_range_(2, 4) size_t waprec, _Out_ LDRColorA& out) { InterpolateRGB(c0, c1, wc, wcprec, out); InterpolateA(c0, c1, wa, waprec, out); } }; +static_assert( sizeof(LDRColorA) == 4, "Unexpected packing"); + class HDRColorA { public: float r, g, b, a; public: - HDRColorA() {} + HDRColorA() DIRECTX_CTOR_DEFAULT HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {} HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {} HDRColorA(const LDRColorA& c) @@ -322,9 +318,10 @@ class INTColor { public: int r, g, b; + int pad; public: - INTColor() {} + INTColor() DIRECTX_CTOR_DEFAULT INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;} INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;} @@ -360,7 +357,7 @@ public: int& operator [] ( _In_ uint8_t i ) { assert(i < sizeof(INTColor) / sizeof(int)); - __analysis_assume(i < sizeof(INTColor) / sizeof(int)); + _Analysis_assume_(i < sizeof(INTColor) / sizeof(int)); return ((int*) this)[i]; } @@ -392,7 +389,7 @@ public: return *this; } - void ToF16(_Out_cap_c_(3) HALF aF16[3], _In_ bool bSigned) const + void ToF16(_Out_writes_(3) HALF aF16[3], _In_ bool bSigned) const { aF16[0] = INT2F16(r, bSigned); aF16[1] = INT2F16(g, bSigned); @@ -445,6 +442,8 @@ private: } }; +static_assert( sizeof(INTColor) == 16, "Unexpected packing"); + struct INTEndPntPair { INTColor A; @@ -458,7 +457,7 @@ public: uint8_t GetBit(_Inout_ size_t& uStartBit) const { assert(uStartBit < 128); - __analysis_assume(uStartBit < 128); + _Analysis_assume_(uStartBit < 128); size_t uIndex = uStartBit >> 3; uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01; uStartBit++; @@ -469,7 +468,7 @@ public: { if(uNumBits == 0) return 0; assert(uStartBit + uNumBits <= 128 && uNumBits <= 8); - __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8); + _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8); uint8_t ret; size_t uIndex = uStartBit >> 3; size_t uBase = uStartBit - (uIndex << 3); @@ -491,7 +490,7 @@ public: void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue) { assert(uStartBit < 128 && uValue < 2); - __analysis_assume(uStartBit < 128 && uValue < 2); + _Analysis_assume_(uStartBit < 128 && uValue < 2); size_t uIndex = uStartBit >> 3; size_t uBase = uStartBit - (uIndex << 3); m_uBits[uIndex] &= ~(1 << uBase); @@ -504,7 +503,7 @@ public: if(uNumBits == 0) return; assert(uStartBit + uNumBits <= 128 && uNumBits <= 8); - __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8); + _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8); assert(uValue < (1 << uNumBits)); size_t uIndex = uStartBit >> 3; size_t uBase = uStartBit - (uIndex << 3); @@ -529,17 +528,16 @@ private: uint8_t m_uBits[ SizeInBytes ]; }; -#pragma warning(push) -#pragma warning(disable : 4127 4480 4512) - // BC6H compression (16 bits per texel) class D3DX_BC6H : private CBits< 16 > { public: - void Decode(_In_ bool bSigned, _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; - void Encode(_In_ bool bSigned, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); + void Decode(_In_ bool bSigned, _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; + void Encode(_In_ bool bSigned, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); private: +#pragma warning(push) +#pragma warning(disable : 4480) enum EField : uint8_t { NA, // N/A @@ -558,6 +556,7 @@ private: BY, BZ, }; +#pragma warning(pop) struct ModeDescriptor { @@ -574,6 +573,8 @@ private: LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2]; }; +#pragma warning(push) +#pragma warning(disable : 4512) struct EncodeParams { float fBestErr; @@ -593,35 +594,36 @@ private: } } }; +#pragma warning(pop) static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned); static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned); static int FinishUnquantize(_In_ int comp, _In_ bool bSigned); - static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]); + static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]); void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts, - _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]) const; - float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const; - float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch, + _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]) const; + float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const; + float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch, _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const; - void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr, + void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr, _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const; - void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const float aOrgErr[], - _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[], - _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const; - static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], - _In_count_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]); - void AssignIndices(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], - _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], - _Out_cap_c_(BC6H_MAX_REGIONS) float aTotErr[]) const; - void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_cap_c_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const; - void EmitBlock(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], - _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]); + void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const float aOrgErr[], + _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[], + _Out_writes_all_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const; + static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], + _In_reads_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]); + void AssignIndices(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], + _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], + _Out_writes_(BC6H_MAX_REGIONS) float aTotErr[]) const; + void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_writes_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const; + void EmitBlock(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], + _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]); void Refine(_Inout_ EncodeParams* pEP); - static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]); - float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_count_(np) const size_t* auIndex) const; + static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]); + float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_reads_(np) const size_t* auIndex) const; float RoughMSE(_Inout_ EncodeParams* pEP) const; private: @@ -634,8 +636,8 @@ private: class D3DX_BC7 : private CBits< 16 > { public: - void Decode(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; - void Encode(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); + void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; + void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); private: struct ModeInfo @@ -651,6 +653,8 @@ private: LDRColorA RGBAPrecWithP; }; +#pragma warning(push) +#pragma warning(disable : 4512) struct EncodeParams { uint8_t uMode; @@ -660,6 +664,7 @@ private: EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {} }; +#pragma warning(pop) static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec) { @@ -699,29 +704,29 @@ private: } void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts, - _Out_cap_c_(BC7_MAX_INDICES) LDRColorA aPalette[]) const; - float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, + _Out_writes_(BC7_MAX_INDICES) LDRColorA aPalette[]) const; + float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, _In_ size_t ch, _In_ const LDREndPntPair &old_endpts, _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const; - void Exhaustive(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, + void Exhaustive(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const; - void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, + void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const; void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode, - _In_count_c_(BC7_MAX_REGIONS) const float orig_err[], - _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[], - _Out_cap_c_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const; + _In_reads_(BC7_MAX_REGIONS) const float orig_err[], + _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[], + _Out_writes_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const; void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode, - _In_count_c_(BC7_MAX_REGIONS) LDREndPntPair endpts[], - _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[], - _Out_cap_c_(BC7_MAX_REGIONS) float afTotErr[]) const; + _In_reads_(BC7_MAX_REGIONS) LDREndPntPair endpts[], + _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[], + _Out_writes_(BC7_MAX_REGIONS) float afTotErr[]) const; void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode, - _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[], - _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[], - _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]); + _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[], + _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[], + _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]); float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode); - float MapColors(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, + float MapColors(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const; static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode); @@ -730,6 +735,8 @@ private: }; //------------------------------------------------------------------------------------- +#pragma warning(push) +#pragma warning(disable : 4127) template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps) { static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f }; @@ -870,28 +877,28 @@ template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPo typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC); typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags); -void D3DXDecodeBC1(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); -void D3DXDecodeBC2(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC3(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC4U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); -void D3DXDecodeBC4S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); -void D3DXDecodeBC5U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC5S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC6HU(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC6HS(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); -void D3DXDecodeBC7(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); - -void D3DXEncodeBC1(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags); +void D3DXDecodeBC1(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC); +void D3DXDecodeBC2(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC3(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC4U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC); +void D3DXDecodeBC4S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(8) const uint8_t *pBC); +void D3DXDecodeBC5U(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC5S(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); +void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC); + +void D3DXEncodeBC1(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags); // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above -void D3DXEncodeBC2(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC3(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC4U(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC4S(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC5U(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC5S(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC6HU(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC6HS(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); -void D3DXEncodeBC7(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC2(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC3(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC4U(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC4S(_Out_writes_(8) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC5U(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC5S(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); }; // namespace diff --git a/thirdparty/directxtex/DirectXTex/BC4BC5.cpp b/thirdparty/directxtex/DirectXTex/BC4BC5.cpp index 9cf40c7f..443adb0b 100644 --- a/thirdparty/directxtex/DirectXTex/BC4BC5.cpp +++ b/thirdparty/directxtex/DirectXTex/BC4BC5.cpp @@ -17,8 +17,6 @@ #include "BC.h" -#pragma warning(disable : 4201) - namespace DirectX { @@ -37,6 +35,9 @@ namespace DirectX // Structures //------------------------------------------------------------------------------------- +#pragma warning(push) +#pragma warning(disable : 4201) + // BC4U/BC5U struct BC4_UNORM { @@ -152,6 +153,7 @@ struct BC4_SNORM }; }; +#pragma warning(pop) //------------------------------------------------------------------------------------- // Convert a floating point value to an 8-bit SNORM @@ -164,10 +166,10 @@ static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm ) fVal = 0; else if( fVal > 1 ) - fVal = 1; // Clamp to 1 + fVal = 1; // Clamp to 1 else if( fVal < -1 ) - fVal = -1; // Clamp to -1 + fVal = -1; // Clamp to -1 fVal = fVal * (int8_t) ( dwMostNeg - 1 ); @@ -181,7 +183,7 @@ static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm ) //------------------------------------------------------------------------------ -static void FindEndPointsBC4U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1) +static void FindEndPointsBC4U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1) { // The boundary of codec for signed/unsigned format float MIN_NORM; @@ -235,7 +237,7 @@ static void FindEndPointsBC4U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[] } } -static void FindEndPointsBC4S(_In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1) +static void FindEndPointsBC4S(_In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1) { // The boundary of codec for signed/unsigned format float MIN_NORM; @@ -291,7 +293,7 @@ static void FindEndPointsBC4S(_In_count_c_(BLOCK_SIZE) const float theTexelsU[], //------------------------------------------------------------------------------ -static inline void FindEndPointsBC5U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[], +static inline void FindEndPointsBC5U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1) { //Encoding the U and V channel by BC4 codec separately. @@ -299,7 +301,7 @@ static inline void FindEndPointsBC5U( _In_count_c_(BLOCK_SIZE) const float theTe FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1); } -static inline void FindEndPointsBC5S( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[], +static inline void FindEndPointsBC5S( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1) { //Encoding the U and V channel by BC4 codec separately. @@ -309,7 +311,7 @@ static inline void FindEndPointsBC5S( _In_count_c_(BLOCK_SIZE) const float theTe //------------------------------------------------------------------------------ -static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[]) +static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[]) { float rGradient[8]; int i; @@ -334,7 +336,7 @@ static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_count_c_(NUM_PIXELS_PER } } -static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[]) +static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[]) { float rGradient[8]; int i; @@ -367,32 +369,37 @@ static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_count_c_(NUM_PIXELS_PER //------------------------------------------------------------------------------------- // BC4 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC ) { assert( pColor && pBC ); static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" ); - const BC4_UNORM * pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC); + auto pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC); for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) { + #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool") pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f); } } +_Use_decl_annotations_ void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" ); - const BC4_SNORM * pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC); + auto pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC); for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) { + #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool") pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f); } } +_Use_decl_annotations_ void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) { UNREFERENCED_PARAMETER( flags ); @@ -401,7 +408,7 @@ void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" ); memset(pBC, 0, sizeof(BC4_UNORM)); - BC4_UNORM * pBC4 = reinterpret_cast<BC4_UNORM*>(pBC); + auto pBC4 = reinterpret_cast<BC4_UNORM*>(pBC); float theTexelsU[NUM_PIXELS_PER_BLOCK]; for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) @@ -413,6 +420,7 @@ void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) FindClosestUNORM(pBC4, theTexelsU); } +_Use_decl_annotations_ void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) { UNREFERENCED_PARAMETER( flags ); @@ -421,7 +429,7 @@ void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" ); memset(pBC, 0, sizeof(BC4_UNORM)); - BC4_SNORM * pBC4 = reinterpret_cast<BC4_SNORM*>(pBC); + auto pBC4 = reinterpret_cast<BC4_SNORM*>(pBC); float theTexelsU[NUM_PIXELS_PER_BLOCK]; for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) @@ -437,34 +445,39 @@ void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) //------------------------------------------------------------------------------------- // BC5 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" ); - const BC4_UNORM * pBCR = reinterpret_cast<const BC4_UNORM*>(pBC); - const BC4_UNORM * pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM)); + auto pBCR = reinterpret_cast<const BC4_UNORM*>(pBC); + auto pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM)); for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) { + #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool") pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f); } } +_Use_decl_annotations_ void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" ); - const BC4_SNORM * pBCR = reinterpret_cast<const BC4_SNORM*>(pBC); - const BC4_SNORM * pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM)); + auto pBCR = reinterpret_cast<const BC4_SNORM*>(pBC); + auto pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM)); for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) { + #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool") pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f); } } +_Use_decl_annotations_ void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) { UNREFERENCED_PARAMETER( flags ); @@ -473,8 +486,8 @@ void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" ); memset(pBC, 0, sizeof(BC4_UNORM)*2); - BC4_UNORM * pBCR = reinterpret_cast<BC4_UNORM*>(pBC); - BC4_UNORM * pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM)); + auto pBCR = reinterpret_cast<BC4_UNORM*>(pBC); + auto pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM)); float theTexelsU[NUM_PIXELS_PER_BLOCK]; float theTexelsV[NUM_PIXELS_PER_BLOCK]; @@ -498,6 +511,7 @@ void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) FindClosestUNORM(pBCG, theTexelsV); } +_Use_decl_annotations_ void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) { UNREFERENCED_PARAMETER( flags ); @@ -506,8 +520,8 @@ void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags ) static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" ); memset(pBC, 0, sizeof(BC4_UNORM)*2); - BC4_SNORM * pBCR = reinterpret_cast<BC4_SNORM*>(pBC); - BC4_SNORM * pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM)); + auto pBCR = reinterpret_cast<BC4_SNORM*>(pBC); + auto pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM)); float theTexelsU[NUM_PIXELS_PER_BLOCK]; float theTexelsV[NUM_PIXELS_PER_BLOCK]; diff --git a/thirdparty/directxtex/DirectXTex/BC6HBC7.cpp b/thirdparty/directxtex/DirectXTex/BC6HBC7.cpp index 2e607543..c6d8fc87 100644 --- a/thirdparty/directxtex/DirectXTex/BC6HBC7.cpp +++ b/thirdparty/directxtex/DirectXTex/BC6HBC7.cpp @@ -311,7 +311,7 @@ static const uint8_t g_aFixUp[3][64][3] = // BC6H Compression const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { - { // 0x00 - 10 5 5 5 + { // Mode 1 (0x00) - 10 5 5 5 { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -323,7 +323,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x01 - 7 6 6 6 + { // Mode 2 (0x01) - 7 6 6 6 { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -335,7 +335,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x02 - 11 5 4 4 + { // Mode 3 (0x02) - 11 5 4 4 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -347,7 +347,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x06 - 11 4 5 4 + { // Mode 4 (0x06) - 11 4 5 4 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -359,7 +359,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x0a - 11 4 4 5 + { // Mode 5 (0x0a) - 11 4 4 5 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -371,7 +371,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x0e - 9 5 5 5 + { // Mode 6 (0x0e) - 9 5 5 5 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -383,7 +383,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x12 - 8 6 5 5 + { // Mode 7 (0x12) - 8 6 5 5 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -395,7 +395,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x16 - 8 5 6 5 + { // Mode 8 (0x16) - 8 5 6 5 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -407,7 +407,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x1a - 8 5 5 6 + { // Mode 9 (0x1a) - 8 5 5 6 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -419,7 +419,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x1e - 6 6 6 6 + { // Mode 10 (0x1e) - 6 6 6 6 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -431,7 +431,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = { D, 3}, { D, 4}, }, - { // 0x03 - 10 10 + { // Mode 11 (0x03) - 10 10 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -443,7 +443,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = {NA, 0}, {NA, 0}, }, - { // 0x07 - 11 9 + { // Mode 12 (0x07) - 11 9 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -455,7 +455,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = {NA, 0}, {NA, 0}, }, - { // 0x0b - 12 8 + { // Mode 13 (0x0b) - 12 8 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -467,7 +467,7 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = {NA, 0}, {NA, 0}, }, - { // 0x0f - 16 4 + { // Mode 14 (0x0f) - 16 4 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4}, {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4}, {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4}, @@ -483,56 +483,56 @@ const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] = // Mode, Partitions, Transformed, IndexPrec, RGBAPrec const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] = { - {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 0 - {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 1 - {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 2 - {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 3 - {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 4 - {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 5 - {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 6 - {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 7 - {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 8 - {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 9 - {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 10 - {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11 - {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12 - {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13 + {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 1 + {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 2 + {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 3 + {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 4 + {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 5 + {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 6 + {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 7 + {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 8 + {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 9 + {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 10 + {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11 + {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12 + {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13 + {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 14 }; const int D3DX_BC6H::ms_aModeToInfo[] = { - 0, // 0x00 - 1, // 0x01 - 2, // 0x02 - 10, // 0x03 - -1, // 0x04 - -1, // 0x05 - 3, // 0x06 - 11, // 0x07 - -1, // 0x08 - -1, // 0x09 - 4, // 0x0a - 12, // 0x0b - -1, // 0x0c - -1, // 0x0d - 5, // 0x0e - 13, // 0x0f - -1, // 0x10 - -1, // 0x11 - 6, // 0x12 - -1, // 0x13 - -1, // 0x14 - -1, // 0x15 - 7, // 0x16 - -1, // 0x17 - -1, // 0x18 - -1, // 0x19 - 8, // 0x1a - -1, // 0x1b - -1, // 0x1c - -1, // 0x1d - 9, // 0x1e - -1, // 0x1f + 0, // Mode 1 - 0x00 + 1, // Mode 2 - 0x01 + 2, // Mode 3 - 0x02 + 10, // Mode 11 - 0x03 + -1, // Invalid - 0x04 + -1, // Invalid - 0x05 + 3, // Mode 4 - 0x06 + 11, // Mode 12 - 0x07 + -1, // Invalid - 0x08 + -1, // Invalid - 0x09 + 4, // Mode 5 - 0x0a + 12, // Mode 13 - 0x0b + -1, // Invalid - 0x0c + -1, // Invalid - 0x0d + 5, // Mode 6 - 0x0e + 13, // Mode 14 - 0x0f + -1, // Invalid - 0x10 + -1, // Invalid - 0x11 + 6, // Mode 7 - 0x12 + -1, // Reserved - 0x13 + -1, // Invalid - 0x14 + -1, // Invalid - 0x15 + 7, // Mode 8 - 0x16 + -1, // Reserved - 0x17 + -1, // Invalid - 0x18 + -1, // Invalid - 0x19 + 8, // Mode 9 - 0x1a + -1, // Reserved - 0x1b + -1, // Invalid - 0x1c + -1, // Invalid - 0x1d + 9, // Mode 10 - 0x1e + -1, // Resreved - 0x1f }; // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP @@ -560,18 +560,10 @@ const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] = //------------------------------------------------------------------------------------- // Helper functions //------------------------------------------------------------------------------------- -template< class T > -inline static void Swap( T& a, T& b ) -{ - T temp = a; - a = b; - b = temp; -} - inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_(0,63) size_t uShape, _In_range_(0,15) size_t uOffset) { assert(uPartitions < 3 && uShape < 64 && uOffset < 16); - __analysis_assume(uPartitions < 3 && uShape < 64 && uOffset < 16); + _Analysis_assume_(uPartitions < 3 && uShape < 64 && uOffset < 16); for(size_t p = 0; p <= uPartitions; p++) { if(uOffset == g_aFixUp[uPartitions][uShape][p]) @@ -582,37 +574,14 @@ inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_( return false; } -inline static float ErrorMetricRGB(_In_ const LDRColorA& a, _In_ const LDRColorA& b) -{ - float er = float(a.r) - float(b.r); - float eg = float(a.g) - float(b.g); - float eb = float(a.b) - float(b.b); - // weigh the components nonuniformly - //er *= 0.299; - //eg *= 0.587; - //eb *= 0.114; - return er*er + eg*eg + eb*eb; -} - -inline static float ErrorMetricAlpha(_In_ const LDRColorA& a, _In_ const LDRColorA& b) -{ - float ea = float(a.a) - float(b.a); - return ea*ea; -} - -inline static float ErrorMetric(_In_ const LDRColorA& a, _In_ const LDRColorA& b) -{ - return ErrorMetricRGB(a, b) + ErrorMetricAlpha(a, b); -} - -inline static void TransformForward(_Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[]) +inline static void TransformForward(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[]) { aEndPts[0].B -= aEndPts[0].A; aEndPts[1].A -= aEndPts[0].A; aEndPts[1].B -= aEndPts[0].A; } -inline static void TransformInverse(_Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned) +inline static void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned) { INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1); aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask; @@ -657,9 +626,9 @@ inline static int NBits(_In_ int n, _In_ bool bIsSigned) //------------------------------------------------------------------------------------- -static float OptimizeRGB(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, +static float OptimizeRGB(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, _Out_ HDRColorA* pX, _Out_ HDRColorA* pY, - _In_ size_t cSteps, _In_ size_t cPixels, _In_count_(cPixels) const size_t* pIndex) + _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex) { float fError = FLT_MAX; const float *pC = (3 == cSteps) ? pC3 : pC4; @@ -737,8 +706,8 @@ static float OptimizeRGB(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* con } } - if(iDirMax & 2) Swap( X.g, Y.g ); - if(iDirMax & 1) Swap( X.b, Y.b ); + if(iDirMax & 2) std::swap( X.g, Y.g ); + if(iDirMax & 1) std::swap( X.b, Y.b ); // Two color block.. no need to root-find if(fAB < 1.0f / 4096.0f) @@ -754,7 +723,7 @@ static float OptimizeRGB(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* con for(size_t iIteration = 0; iIteration < 8; iIteration++) { // Calculate new steps - HDRColorA pSteps[4]; + HDRColorA pSteps[4] = {}; for(size_t iStep = 0; iStep < cSteps; iStep++) { @@ -849,9 +818,9 @@ static float OptimizeRGB(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* con //------------------------------------------------------------------------------------- -static float OptimizeRGBA(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, +static float OptimizeRGBA(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, _Out_ HDRColorA* pX, _Out_ HDRColorA* pY, - _In_ size_t cSteps, _In_ size_t cPixels, _In_count_(cPixels) const size_t* pIndex) + _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex) { float fError = FLT_MAX; const float *pC = (3 == cSteps) ? pC3 : pC4; @@ -924,9 +893,9 @@ static float OptimizeRGBA(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* co } } - if(iDirMax & 4) Swap(X.g, Y.g); - if(iDirMax & 2) Swap(X.b, Y.b); - if(iDirMax & 1) Swap(X.a, Y.a); + if(iDirMax & 4) std::swap(X.g, Y.g); + if(iDirMax & 2) std::swap(X.b, Y.b); + if(iDirMax & 1) std::swap(X.a, Y.a); // Two color block.. no need to root-find if(fAB < 1.0f / 4096.0f) @@ -944,7 +913,6 @@ static float OptimizeRGBA(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* co // Calculate new steps HDRColorA pSteps[BC7_MAX_INDICES]; - LDRColorA aSteps[BC7_MAX_INDICES]; LDRColorA lX, lY; lX = (X * 255.0f).ToLDRColorA(); lY = (Y * 255.0f).ToLDRColorA(); @@ -1014,13 +982,12 @@ static float OptimizeRGBA(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* co //------------------------------------------------------------------------------------- -#pragma warning(disable: 4616 6001 6297) -static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uIndexPrec) const LDRColorA aPalette[], +static float ComputeError(_Inout_ const LDRColorA& pixel, _In_reads_(1 << uIndexPrec) const LDRColorA aPalette[], _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t* pBestIndex = nullptr, _Out_opt_ size_t* pBestIndex2 = nullptr) { - const size_t uNumIndices = 1 << uIndexPrec; - const size_t uNumIndices2 = 1 << uIndexPrec2; + const size_t uNumIndices = size_t(1) << uIndexPrec; + const size_t uNumIndices2 = size_t(1) << uIndexPrec2; float fTotalErr = 0; float fBestErr = FLT_MAX; @@ -1029,11 +996,16 @@ static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uInd if(pBestIndex2) *pBestIndex2 = 0; + XMVECTOR vpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &pixel ) ); + if(uIndexPrec2 == 0) { for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++) { - float fErr = ErrorMetric(pixel, aPalette[i]); + XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) ); + // Compute ErrorMetric + tpixel = XMVectorSubtract( vpixel, tpixel ); + float fErr = XMVectorGetX( XMVector4Dot( tpixel, tpixel ) ); if(fErr > fBestErr) // error increased, so we're done searching break; if(fErr < fBestErr) @@ -1049,7 +1021,10 @@ static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uInd { for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++) { - float fErr = ErrorMetricRGB(pixel, aPalette[i]); + XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) ); + // Compute ErrorMetricRGB + tpixel = XMVectorSubtract( vpixel, tpixel ); + float fErr = XMVectorGetX( XMVector3Dot( tpixel, tpixel ) ); if(fErr > fBestErr) // error increased, so we're done searching break; if(fErr < fBestErr) @@ -1063,7 +1038,9 @@ static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uInd fBestErr = FLT_MAX; for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++) { - float fErr = ErrorMetricAlpha(pixel, aPalette[i]); + // Compute ErrorMetricAlpha + float ea = float(pixel.a) - float(aPalette[i].a); + float fErr = ea*ea; if(fErr > fBestErr) // error increased, so we're done searching break; if(fErr < fBestErr) @@ -1080,7 +1057,7 @@ static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uInd } -inline static void FillWithErrorColors( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut ) +inline static void FillWithErrorColors( _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut ) { for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) { @@ -1098,6 +1075,7 @@ inline static void FillWithErrorColors( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRCol //------------------------------------------------------------------------------------- // BC6H Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const { assert(pOut ); @@ -1110,16 +1088,16 @@ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const } assert( uMode < 32 ); - __analysis_assume( uMode < 32 ); + _Analysis_assume_( uMode < 32 ); if ( ms_aModeToInfo[uMode] >= 0 ) { assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo)); - __analysis_assume(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo)); + _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo)); const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]]; assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc)); - __analysis_assume(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc)); + _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc)); const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]]; INTEndPntPair aEndPts[BC6H_MAX_REGIONS]; @@ -1161,7 +1139,7 @@ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const } assert( uShape < 64 ); - __analysis_assume( uShape < 64 ); + _Analysis_assume_( uShape < 64 ); // Sign extend necessary end points if(bSigned) @@ -1171,7 +1149,7 @@ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const if(bSigned || info.bTransformed) { assert( info.uPartitions < BC6H_MAX_REGIONS ); - __analysis_assume( info.uPartitions < BC6H_MAX_REGIONS ); + _Analysis_assume_( info.uPartitions < BC6H_MAX_REGIONS ); for(size_t p = 0; p <= info.uPartitions; ++p) { if(p != 0) @@ -1213,7 +1191,7 @@ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i]; assert( uRegion < BC6H_MAX_REGIONS ); - __analysis_assume( uRegion < BC6H_MAX_REGIONS ); + _Analysis_assume_( uRegion < BC6H_MAX_REGIONS ); // Unquantize endpoints and interpolate int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned); @@ -1240,12 +1218,25 @@ void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const else { #ifdef _DEBUG - OutputDebugStringA( "BC6H: Invalid mode encountered during decoding\n" ); + const char* warnstr = "BC6H: Invalid mode encountered during decoding\n"; + switch( uMode ) + { + case 0x13: warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break; + case 0x17: warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break; + case 0x1B: warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break; + case 0x1F: warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break; + } + OutputDebugStringA( warnstr ); #endif - FillWithErrorColors( pOut ); + // Per the BC6H format spec, we must return opaque black + for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) + { + pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f); + } } } +_Use_decl_annotations_ void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn) { assert( pIn ); @@ -1276,8 +1267,8 @@ void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn) { if(afRoughMSE[i] > afRoughMSE[j]) { - Swap(afRoughMSE[i], afRoughMSE[j]); - Swap(auShape[i], auShape[j]); + std::swap(afRoughMSE[i], afRoughMSE[j]); + std::swap(auShape[i], auShape[j]); } } } @@ -1292,6 +1283,7 @@ void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned) { assert(prec > 1); // didn't bother to make it work for 1 @@ -1319,6 +1311,7 @@ int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned) return q; } +_Use_decl_annotations_ int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned) { int unq = 0, s = 0; @@ -1354,6 +1347,7 @@ int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned) return unq; } +_Use_decl_annotations_ int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned) { if(bSigned) @@ -1368,6 +1362,7 @@ int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[]) { assert( pEP ); @@ -1408,13 +1403,14 @@ bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPt return true; } +_Use_decl_annotations_ void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const { assert( pEP ); const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec; - const size_t uNumIndices = 1 << uIndexPrec; + const size_t uNumIndices = size_t(1) << uIndexPrec; assert( uNumIndices > 0 ); - __analysis_assume( uNumIndices > 0 ); + _Analysis_assume_( uNumIndices > 0 ); const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0]; // scale endpoints @@ -1430,9 +1426,16 @@ void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPn const int* aWeights = nullptr; switch(uIndexPrec) { - case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); __analysis_assume(uNumIndices <= 8); break; - case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); __analysis_assume(uNumIndices <= 16); break; - default: assert(false); for(size_t i=0; i < uNumIndices; ++i) aPalette[i] = INTColor(0,0,0); return; + case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break; + case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break; + default: + assert(false); + for(size_t i = 0; i < uNumIndices; ++i) + { + #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool") + aPalette[i] = INTColor(0,0,0); + } + return; } for (size_t i = 0; i < uNumIndices; ++i) @@ -1450,6 +1453,7 @@ void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPn } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +_Use_decl_annotations_ float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const { assert( pEP ); @@ -1462,10 +1466,19 @@ float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aCol float fTotErr = 0; for(size_t i = 0; i < np; ++i) { - float fBestErr = Norm(aColors[i], aPalette[0]); + XMVECTOR vcolors = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aColors[i] ) ); + + // Compute ErrorMetricRGB + XMVECTOR tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[0] ) ); + tpal = XMVectorSubtract( vcolors, tpal ); + float fBestErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) ); + for(int j = 1; j < uNumIndices && fBestErr > 0; ++j) { - float fErr = Norm(aColors[i], aPalette[j]); + // Compute ErrorMetricRGB + tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[j] ) ); + tpal = XMVectorSubtract( vcolors, tpal ); + float fErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) ); if(fErr > fBestErr) break; // error increased, so we're done searching if(fErr < fBestErr) fBestErr = fErr; } @@ -1474,6 +1487,7 @@ float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aCol return fTotErr; } +_Use_decl_annotations_ float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch, const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const { @@ -1533,6 +1547,7 @@ float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], s return fMinErr; } +_Use_decl_annotations_ void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr, const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const { @@ -1584,12 +1599,13 @@ void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], s } } +_Use_decl_annotations_ void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const { assert( pEP ); const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC6H_MAX_REGIONS ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS ); INTColor aPixels[NUM_PIXELS_PER_BLOCK]; for(size_t p = 0; p <= uPartitions; ++p) @@ -1609,15 +1625,16 @@ void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[] } // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit +_Use_decl_annotations_ void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[]) { assert( pEP ); const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; - const size_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec; + const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec; const size_t uHighIndexBit = uNumIndices >> 1; assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); for(size_t p = 0; p <= uPartitions; ++p) { @@ -1626,7 +1643,7 @@ void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], si if(aIndices[i] & uHighIndexBit) { // high bit is set, swap the aEndPts and indices for this region - Swap(aEndPts[p].A, aEndPts[p].B); + std::swap(aEndPts[p].A, aEndPts[p].B); for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j) if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p) @@ -1636,6 +1653,7 @@ void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], si } // assign indices given a tile, shape, and quantized endpoints, return toterr for each region +_Use_decl_annotations_ void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const { assert( pEP ); @@ -1643,7 +1661,7 @@ void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndP const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec; assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); // build list of possibles INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES]; @@ -1658,7 +1676,7 @@ void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndP { const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i]; assert( uRegion < BC6H_MAX_REGIONS ); - __analysis_assume( uRegion < BC6H_MAX_REGIONS ); + _Analysis_assume_( uRegion < BC6H_MAX_REGIONS ); float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]); aIndices[i] = 0; @@ -1676,6 +1694,7 @@ void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndP } } +_Use_decl_annotations_ void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const { assert( pEP && aQntEndPts ); @@ -1683,7 +1702,7 @@ void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPt const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0]; const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC6H_MAX_REGIONS ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS ); for(size_t p = 0; p <= uPartitions; ++p) { @@ -1696,6 +1715,7 @@ void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPt } } +_Use_decl_annotations_ void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[]) { assert( pEP ); @@ -1738,12 +1758,13 @@ void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[] assert(uStartBit == 128); } +_Use_decl_annotations_ void D3DX_BC6H::Refine(EncodeParams* pEP) { assert( pEP ); const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC6H_MAX_REGIONS ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS ); const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed; float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS]; @@ -1786,23 +1807,31 @@ void D3DX_BC6H::Refine(EncodeParams* pEP) } } +_Use_decl_annotations_ void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[]) { assert( pEP ); assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); - __analysis_assume( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); + _Analysis_assume_( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES ); const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion]; const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec; const uint8_t uNumIndices = 1 << uIndexPrec; - assert( uNumIndices > 0 ); - __analysis_assume( uNumIndices > 0 ); + assert(uNumIndices > 0); + _Analysis_assume_(uNumIndices > 0); const int* aWeights = nullptr; switch(uIndexPrec) { - case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); __analysis_assume(uNumIndices <= 8); break; - case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); __analysis_assume(uNumIndices <= 16); break; - default: assert(false); for(size_t i = 0; i < uNumIndices; ++i) aPalette[i] = INTColor(0,0,0); return; + case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break; + case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break; + default: + assert(false); + for(size_t i = 0; i < uNumIndices; ++i) + { + #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool") + aPalette[i] = INTColor(0,0,0); + } + return; } for(register size_t i = 0; i < uNumIndices; ++i) @@ -1813,6 +1842,7 @@ void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegi } } +_Use_decl_annotations_ float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const { assert( pEP ); @@ -1837,17 +1867,18 @@ float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, c return fTotalErr; } +_Use_decl_annotations_ float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const { assert( pEP ); assert( pEP->uShape < BC6H_MAX_SHAPES); - __analysis_assume( pEP->uShape < BC6H_MAX_SHAPES); + _Analysis_assume_( pEP->uShape < BC6H_MAX_SHAPES); INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape]; const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC6H_MAX_REGIONS ); - __analysis_assume( uPartitions < BC6H_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS ); size_t auPixIdx[NUM_PIXELS_PER_BLOCK]; @@ -1904,6 +1935,7 @@ float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const //------------------------------------------------------------------------------------- // BC7 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DX_BC7::Decode(HDRColorA* pOut) const { assert( pOut ); @@ -1916,7 +1948,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const { const uint8_t uPartitions = ms_aInfo[uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS ); const uint8_t uNumEndPts = (uPartitions + 1) << 1; const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec; @@ -1926,7 +1958,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const uint8_t P[6]; uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits); assert( uShape < BC7_MAX_SHAPES ); - __analysis_assume( uShape < BC7_MAX_SHAPES ); + _Analysis_assume_( uShape < BC7_MAX_SHAPES ); uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits); assert( uRotation < 4 ); @@ -2002,7 +2034,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const // P-bits assert( ms_aInfo[uMode].uPBits <= 6 ); - __analysis_assume( ms_aInfo[uMode].uPBits <= 6 ); + _Analysis_assume_( ms_aInfo[uMode].uPBits <= 6 ); for(i = 0; i < ms_aInfo[uMode].uPBits; i++) { if ( uStartBit > 127 ) @@ -2094,9 +2126,9 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const switch(uRotation) { - case 1: Swap(outPixel.r, outPixel.a); break; - case 2: Swap(outPixel.g, outPixel.a); break; - case 3: Swap(outPixel.b, outPixel.a); break; + case 1: std::swap(outPixel.r, outPixel.a); break; + case 2: std::swap(outPixel.g, outPixel.a); break; + case 3: std::swap(outPixel.b, outPixel.a); break; } pOut[i] = HDRColorA(outPixel); @@ -2105,13 +2137,15 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const else { #ifdef _DEBUG - OutputDebugStringA( "BC7: Invalid mode encountered during decoding\n" ); + OutputDebugStringA( "BC7: Reserved mode 8 encountered during decoding\n" ); #endif - FillWithErrorColors( pOut ); + // Per the BC7 format spec, we must return transparent black + memset( pOut, 0, sizeof(HDRColorA) * NUM_PIXELS_PER_BLOCK ); } } -void D3DX_BC7::Encode(const HDRColorA* const pIn) +_Use_decl_annotations_ +void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn) { assert( pIn ); @@ -2129,12 +2163,18 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn) for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode) { - const size_t uShapes = 1 << ms_aInfo[EP.uMode].uPartitionBits; + if ( skip3subsets && (EP.uMode == 0 || EP.uMode == 2) ) + { + // 3 subset modes tend to be used rarely and add significant compression time + continue; + } + + const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits; assert( uShapes <= BC7_MAX_SHAPES ); - __analysis_assume( uShapes <= BC7_MAX_SHAPES ); + _Analysis_assume_( uShapes <= BC7_MAX_SHAPES ); - const size_t uNumRots = 1 << ms_aInfo[EP.uMode].uRotationBits; - const size_t uNumIdxMode = 1 << ms_aInfo[EP.uMode].uIndexModeBits; + const size_t uNumRots = size_t(1) << ms_aInfo[EP.uMode].uRotationBits; + const size_t uNumIdxMode = size_t(1) << ms_aInfo[EP.uMode].uIndexModeBits; // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out const size_t uItems = std::max<size_t>(1, uShapes >> 2); @@ -2145,9 +2185,9 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn) { switch(r) { - case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break; - case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break; - case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break; + case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break; + case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break; + case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break; } for(size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im) @@ -2166,8 +2206,8 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn) { if(afRoughMSE[i] > afRoughMSE[j]) { - Swap(afRoughMSE[i], afRoughMSE[j]); - Swap(auShape[i], auShape[j]); + std::swap(afRoughMSE[i], afRoughMSE[j]); + std::swap(auShape[i], auShape[j]); } } } @@ -2185,9 +2225,9 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn) switch(r) { - case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break; - case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break; - case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break; + case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break; + case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break; + case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break; } } } @@ -2197,17 +2237,18 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const { assert( pEP ); const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec; const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2; - const size_t uNumIndices = 1 << uIndexPrec; - const size_t uNumIndices2 = 1 << uIndexPrec2; + const size_t uNumIndices = size_t(1) << uIndexPrec; + const size_t uNumIndices2 = size_t(1) << uIndexPrec2; assert( uNumIndices > 0 && uNumIndices2 > 0 ); - __analysis_assume( uNumIndices > 0 && uNumIndices2 > 0 ); + _Analysis_assume_( uNumIndices > 0 && uNumIndices2 > 0 ); assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); - __analysis_assume( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); + _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP); LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP); @@ -2225,6 +2266,7 @@ void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMo } } +_Use_decl_annotations_ float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch, const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const { @@ -2266,6 +2308,7 @@ float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], s // perturb the endpoints at least -3 to 3. // always ensure endpoint ordering is preserved (no need to overlap the scan) +_Use_decl_annotations_ void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch, float& fOrgErr, LDREndPntPair& optEndPt) const { @@ -2336,6 +2379,7 @@ void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], si } } +_Use_decl_annotations_ void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const { @@ -2401,13 +2445,14 @@ void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], s Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt); } +_Use_decl_annotations_ void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[], const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const { assert( pEP ); const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES ); LDRColorA aPixels[NUM_PIXELS_PER_BLOCK]; @@ -2423,16 +2468,17 @@ void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t } } +_Use_decl_annotations_ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[], float afTotErr[]) const { assert( pEP ); assert( uShape < BC7_MAX_SHAPES ); - __analysis_assume( uShape < BC7_MAX_SHAPES ); + _Analysis_assume_( uShape < BC7_MAX_SHAPES ); const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS ); const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec; const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2; @@ -2440,14 +2486,13 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd const uint8_t uNumIndices2 = 1 << uIndexPrec2; assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); - __analysis_assume( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); + _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) ); const uint8_t uHighestIndexBit = uNumIndices >> 1; const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1; LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES]; // build list of possibles - LDREndPntPair adjusted_endPts; for(size_t p = 0; p <= uPartitions; p++) { GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]); @@ -2458,7 +2503,7 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd { uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i]; assert( uRegion < BC7_MAX_REGIONS ); - __analysis_assume( uRegion < BC7_MAX_REGIONS ); + _Analysis_assume_( uRegion < BC7_MAX_REGIONS ); afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i])); } @@ -2469,7 +2514,7 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd { if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) { - Swap(endPts[p].A, endPts[p].B); + std::swap(endPts[p].A, endPts[p].B); for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) if(g_aPartitionTable[uPartitions][uShape][i] == p) aIndices[i] = uNumIndices - 1 - aIndices[i]; @@ -2483,9 +2528,9 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd { if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) { - Swap(endPts[p].A.r, endPts[p].B.r); - Swap(endPts[p].A.g, endPts[p].B.g); - Swap(endPts[p].A.b, endPts[p].B.b); + std::swap(endPts[p].A.r, endPts[p].B.r); + std::swap(endPts[p].A.g, endPts[p].B.g); + std::swap(endPts[p].A.b, endPts[p].B.b); for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) if(g_aPartitionTable[uPartitions][uShape][i] == p) aIndices[i] = uNumIndices - 1 - aIndices[i]; @@ -2494,7 +2539,7 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd if(aIndices2[0] & uHighestIndexBit2) { - Swap(endPts[p].A.a, endPts[p].B.a); + std::swap(endPts[p].A.a, endPts[p].B.a); for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) aIndices2[i] = uNumIndices2 - 1 - aIndices2[i]; } @@ -2503,12 +2548,13 @@ void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uInd } } +_Use_decl_annotations_ void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[]) { assert( pEP ); const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS ); const size_t uPBits = ms_aInfo[pEP->uMode].uPBits; const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec; @@ -2525,7 +2571,7 @@ void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotatio if(uPBits) { - const size_t uNumEP = (1 + uPartitions) << 1; + const size_t uNumEP = size_t(1 + uPartitions) << 1; uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0}; uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0}; for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++) @@ -2544,12 +2590,12 @@ void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotatio SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1); size_t idx = ep++ * uPBits / uNumEP; assert(idx < (BC7_MAX_REGIONS << 1)); - __analysis_assume(idx < (BC7_MAX_REGIONS << 1)); + _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1)); aPVote[idx] += aEndPts[i].A[ch] & 0x01; aCount[idx]++; idx = ep++ * uPBits / uNumEP; assert(idx < (BC7_MAX_REGIONS << 1)); - __analysis_assume(idx < (BC7_MAX_REGIONS << 1)); + _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1)); aPVote[idx] += aEndPts[i].B[ch] & 0x01; aCount[idx]++; } @@ -2589,16 +2635,17 @@ void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotatio assert(uStartBit == 128); } +_Use_decl_annotations_ float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode) { assert( pEP ); assert( uShape < BC7_MAX_SHAPES ); - __analysis_assume( uShape < BC7_MAX_SHAPES ); + _Analysis_assume_( uShape < BC7_MAX_SHAPES ); const LDREndPntPair* aEndPts = pEP->aEndPts[uShape]; const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS ); LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS]; LDREndPntPair aOptEndPts[BC7_MAX_REGIONS]; @@ -2637,6 +2684,7 @@ float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, } } +_Use_decl_annotations_ float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const { assert( pEP ); @@ -2659,16 +2707,17 @@ float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], si return fTotalErr; } +_Use_decl_annotations_ float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode) { assert( pEP ); assert( uShape < BC7_MAX_SHAPES ); - __analysis_assume( uShape < BC7_MAX_SHAPES ); + _Analysis_assume_( uShape < BC7_MAX_SHAPES ); LDREndPntPair* aEndPts = pEP->aEndPts[uShape]; const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions; assert( uPartitions < BC7_MAX_REGIONS ); - __analysis_assume( uPartitions < BC7_MAX_REGIONS ); + _Analysis_assume_( uPartitions < BC7_MAX_REGIONS ); const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec; const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2; @@ -2770,6 +2819,7 @@ float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode) //------------------------------------------------------------------------------------- // BC6H Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); @@ -2777,6 +2827,7 @@ void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC) reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(false, reinterpret_cast<HDRColorA*>(pColor)); } +_Use_decl_annotations_ void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); @@ -2784,6 +2835,7 @@ void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC) reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor)); } +_Use_decl_annotations_ void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { UNREFERENCED_PARAMETER(flags); @@ -2792,6 +2844,7 @@ void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(false, reinterpret_cast<const HDRColorA*>(pColor)); } +_Use_decl_annotations_ void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { UNREFERENCED_PARAMETER(flags); @@ -2804,6 +2857,7 @@ void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) //------------------------------------------------------------------------------------- // BC7 Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC) { assert( pColor && pBC ); @@ -2811,12 +2865,12 @@ void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC) reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(reinterpret_cast<HDRColorA*>(pColor)); } +_Use_decl_annotations_ void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { - UNREFERENCED_PARAMETER(flags); assert( pBC && pColor ); static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" ); - reinterpret_cast< D3DX_BC7* >( pBC )->Encode(reinterpret_cast<const HDRColorA*>(pColor)); + reinterpret_cast< D3DX_BC7* >( pBC )->Encode( !(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast<const HDRColorA*>(pColor)); } -} // namespace
\ No newline at end of file +} // namespace diff --git a/thirdparty/directxtex/DirectXTex/BCDirectCompute.cpp b/thirdparty/directxtex/DirectXTex/BCDirectCompute.cpp new file mode 100644 index 00000000..0976dc12 --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/BCDirectCompute.cpp @@ -0,0 +1,616 @@ +//------------------------------------------------------------------------------------- +// BCDirectCompute.cpp +// +// Direct3D 11 Compute Shader BC Compressor +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- + +#include "directxtexp.h" + +#include "BCDirectCompute.h" + +#if defined(_DEBUG) || defined(PROFILE) +#pragma comment(lib,"dxguid.lib") +#endif + +using Microsoft::WRL::ComPtr; + +namespace +{ + #include "Shaders\Compiled\BC7Encode_EncodeBlockCS.inc" + #include "Shaders\Compiled\BC7Encode_TryMode02CS.inc" + #include "Shaders\Compiled\BC7Encode_TryMode137CS.inc" + #include "Shaders\Compiled\BC7Encode_TryMode456CS.inc" + #include "Shaders\Compiled\BC6HEncode_EncodeBlockCS.inc" + #include "Shaders\Compiled\BC6HEncode_TryModeG10CS.inc" + #include "Shaders\Compiled\BC6HEncode_TryModeLE10CS.inc" + + struct BufferBC6HBC7 + { + UINT color[4]; + }; + + struct ConstantsBC6HBC7 + { + UINT tex_width; + UINT num_block_x; + UINT format; + UINT mode_id; + UINT start_block_id; + UINT num_total_blocks; + float alpha_weight; + UINT reserved; + }; + + static_assert( sizeof(ConstantsBC6HBC7) == sizeof(UINT)*8, "Constant buffer size mismatch" ); + + inline void RunComputeShader( ID3D11DeviceContext* pContext, + ID3D11ComputeShader* shader, + ID3D11ShaderResourceView** pSRVs, + UINT srvCount, + ID3D11Buffer* pCB, + ID3D11UnorderedAccessView* pUAV, + UINT X ) + { + // Force UAV to nullptr before setting SRV since we are swapping buffers + ID3D11UnorderedAccessView* nullUAV = nullptr; + pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr ); + + pContext->CSSetShader( shader, nullptr, 0 ); + pContext->CSSetShaderResources( 0, srvCount, pSRVs ); + pContext->CSSetUnorderedAccessViews( 0, 1, &pUAV, nullptr ); + pContext->CSSetConstantBuffers( 0, 1, &pCB ); + pContext->Dispatch( X, 1, 1 ); + } + + inline void ResetContext( ID3D11DeviceContext* pContext ) + { + ID3D11UnorderedAccessView* nullUAV = nullptr; + pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr ); + + ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; + pContext->CSSetShaderResources( 0, 3, nullSRV ); + + ID3D11Buffer* nullBuffer[1] = { nullptr }; + pContext->CSSetConstantBuffers( 0, 1, nullBuffer ); + } +}; + +namespace DirectX +{ + +GPUCompressBC::GPUCompressBC() : + m_bcformat(DXGI_FORMAT_UNKNOWN), + m_srcformat(DXGI_FORMAT_UNKNOWN), + m_alphaWeight(1.f), + m_width(0), + m_height(0) +{ +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT GPUCompressBC::Initialize( ID3D11Device* pDevice ) +{ + if ( !pDevice ) + return E_INVALIDARG; + + // Check for DirectCompute support + D3D_FEATURE_LEVEL fl = pDevice->GetFeatureLevel(); + + if ( fl < D3D_FEATURE_LEVEL_10_0 ) + { + // DirectCompute not supported on Feature Level 9.x hardware + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + + if ( fl < D3D_FEATURE_LEVEL_11_0 ) + { + // DirectCompute support on Feature Level 10.x hardware is optional, and this function needs it + D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts; + HRESULT hr = pDevice->CheckFeatureSupport( D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts) ); + if ( FAILED(hr) ) + { + memset( &hwopts, 0, sizeof(hwopts) ); + } + + if ( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x ) + { + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + } + + // Save a device reference and obtain immediate context + m_device = pDevice; + + pDevice->GetImmediateContext( m_context.ReleaseAndGetAddressOf() ); + assert( m_context ); + + //--- Create compute shader library: BC6H ----------------------------------------- + + // Modes 11-14 + HRESULT hr = pDevice->CreateComputeShader( BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + // Modes 1-10 + hr = pDevice->CreateComputeShader( BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + // Encode + hr = pDevice->CreateComputeShader( BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + //--- Create compute shader library: BC7 ------------------------------------------ + + // Modes 4, 5, 6 + hr = pDevice->CreateComputeShader( BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + // Modes 1, 3, 7 + hr = pDevice->CreateComputeShader( BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + // Modes 0, 2 + hr = pDevice->CreateComputeShader( BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + // Encode + hr = pDevice->CreateComputeShader( BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + return hr; + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT GPUCompressBC::Prepare( size_t width, size_t height, DXGI_FORMAT format, float alphaWeight, bool skip3subsets ) +{ + if ( !width || !height || alphaWeight < 0.f ) + return E_INVALIDARG; + +#ifdef _M_X64 + if ( (width > 0xFFFFFFFF) || (height > 0xFFFFFFFF) ) + return E_INVALIDARG; +#endif + + m_width = width; + m_height = height; + + m_alphaWeight = alphaWeight; + + m_skip3Subsets = skip3subsets; + + size_t xblocks = std::max<size_t>( 1, (width + 3) >> 2 ); + size_t yblocks = std::max<size_t>( 1, (height + 3) >> 2 ); + size_t num_blocks = xblocks * yblocks; + + switch( format ) + { + // BC6H GPU compressor takes RGBAF32 as input + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + m_srcformat = DXGI_FORMAT_R32G32B32A32_FLOAT; + break; + + // BC7 GPU compressor takes RGBA32 as input + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + + case DXGI_FORMAT_BC7_UNORM_SRGB: + m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + break; + + default: + m_bcformat = m_srcformat = DXGI_FORMAT_UNKNOWN; + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + + m_bcformat = format; + + auto pDevice = m_device.Get(); + if ( !pDevice ) + return E_POINTER; + + // Create structured buffers + size_t bufferSize = num_blocks * sizeof( BufferBC6HBC7 ); + { + D3D11_BUFFER_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + desc.StructureByteStride = sizeof( BufferBC6HBC7 ); + desc.ByteWidth = static_cast<UINT>( bufferSize ); + + HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_output.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + + hr = pDevice->CreateBuffer( &desc, nullptr, m_err1.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + + hr = pDevice->CreateBuffer( &desc, nullptr, m_err2.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + // Create staging output buffer + { + D3D11_BUFFER_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.ByteWidth = static_cast<UINT>( bufferSize ); + + HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_outputCPU.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + // Create constant buffer + { + D3D11_BUFFER_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.ByteWidth = sizeof( ConstantsBC6HBC7 ); + + HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_constBuffer.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + // Create shader resource views + { + D3D11_SHADER_RESOURCE_VIEW_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.Buffer.NumElements = static_cast<UINT>( num_blocks ); + desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + + HRESULT hr = pDevice->CreateShaderResourceView( m_err1.Get(), &desc, m_err1SRV.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + + hr = pDevice->CreateShaderResourceView( m_err2.Get(), &desc, m_err2SRV.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + // Create unordered access views + { + D3D11_UNORDERED_ACCESS_VIEW_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.Buffer.NumElements = static_cast<UINT>( num_blocks ); + desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + + HRESULT hr = pDevice->CreateUnorderedAccessView( m_output.Get(), &desc, m_outputUAV.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + + hr = pDevice->CreateUnorderedAccessView( m_err1.Get(), &desc, m_err1UAV.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + + hr = pDevice->CreateUnorderedAccessView( m_err2.Get(), &desc, m_err2UAV.ReleaseAndGetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage ) +{ + if ( !srcImage.pixels || !destImage.pixels ) + return E_INVALIDARG; + + if ( srcImage.width != destImage.width + || srcImage.height != destImage.height + || srcImage.width != m_width + || srcImage.height != m_height + || srcImage.format != m_srcformat + || destImage.format != m_bcformat ) + { + return E_UNEXPECTED; + } + + //--- Create input texture -------------------------------------------------------- + auto pDevice = m_device.Get(); + if ( !pDevice ) + return E_POINTER; + + // We need to avoid the hardware doing additional colorspace conversion + DXGI_FORMAT inputFormat = ( m_srcformat == DXGI_FORMAT_R8G8B8A8_UNORM_SRGB ) ? DXGI_FORMAT_R8G8B8A8_UNORM : m_srcformat; + + ComPtr<ID3D11Texture2D> sourceTex; + { + D3D11_TEXTURE2D_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.Width = static_cast<UINT>( srcImage.width ); + desc.Height = static_cast<UINT>( srcImage.height ); + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = inputFormat; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData; + initData.pSysMem = srcImage.pixels; + initData.SysMemPitch = static_cast<DWORD>( srcImage.rowPitch ); + initData.SysMemSlicePitch = static_cast<DWORD>( srcImage.slicePitch ); + + HRESULT hr = pDevice->CreateTexture2D( &desc, &initData, sourceTex.GetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + ComPtr<ID3D11ShaderResourceView> sourceSRV; + { + D3D11_SHADER_RESOURCE_VIEW_DESC desc; + memset( &desc, 0, sizeof(desc) ); + desc.Texture2D.MipLevels = 1; + desc.Format = inputFormat; + desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + + HRESULT hr = pDevice->CreateShaderResourceView( sourceTex.Get(), &desc, sourceSRV.GetAddressOf() ); + if ( FAILED(hr) ) + { + return hr; + } + } + + //--- Compress using DirectCompute ------------------------------------------------ + bool isbc7 = false; + switch( m_bcformat ) + { + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + break; + + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + isbc7 = true; + break; + + default: + return E_UNEXPECTED; + } + + const UINT MAX_BLOCK_BATCH = 64; + + auto pContext = m_context.Get(); + if ( !pContext ) + return E_UNEXPECTED; + + size_t xblocks = std::max<size_t>( 1, (m_width + 3) >> 2 ); + size_t yblocks = std::max<size_t>( 1, (m_height + 3) >> 2 ); + + UINT num_total_blocks = static_cast<UINT>( xblocks * yblocks ); + UINT num_blocks = num_total_blocks; + int start_block_id = 0; + while (num_blocks > 0) + { + UINT n = std::min<UINT>( num_blocks, MAX_BLOCK_BATCH ); + UINT uThreadGroupCount = n; + + { + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED(hr) ) + return hr; + + ConstantsBC6HBC7 param; + param.tex_width = static_cast<UINT>( srcImage.width ); + param.num_block_x = static_cast<UINT>( xblocks ); + param.format = m_bcformat; + param.mode_id = 0; + param.start_block_id = start_block_id; + param.num_total_blocks = num_total_blocks; + param.alpha_weight = m_alphaWeight; + memcpy( mapped.pData, ¶m, sizeof( param ) ); + + pContext->Unmap( m_constBuffer.Get(), 0 ); + } + + if ( isbc7 ) + { + //--- BC7 ----------------------------------------------------------------- + ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr }; + RunComputeShader( pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(), + m_err1UAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) ); + + for ( UINT i = 0; i < 3; ++i ) + { + static const UINT modes[] = { 1, 3, 7 }; + + // Mode 1: err1 -> err2 + // Mode 3: err2 -> err1 + // Mode 7: err1 -> err2 + { + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED(hr) ) + { + ResetContext( pContext ); + return hr; + } + + ConstantsBC6HBC7 param; + param.tex_width = static_cast<UINT>( srcImage.width ); + param.num_block_x = static_cast<UINT>( xblocks ); + param.format = m_bcformat; + param.mode_id = modes[i]; + param.start_block_id = start_block_id; + param.num_total_blocks = num_total_blocks; + param.alpha_weight = m_alphaWeight; + memcpy( mapped.pData, ¶m, sizeof( param ) ); + pContext->Unmap( m_constBuffer.Get(), 0 ); + } + + pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); + RunComputeShader( pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(), + (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount ); + } + + if ( !m_skip3Subsets ) + { + // 3 subset modes tend to be used rarely and add significant compression time + for ( UINT i = 0; i < 2; ++i ) + { + static const UINT modes[] = { 0, 2 }; + // Mode 0: err2 -> err1 + // Mode 2: err1 -> err2 + { + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED(hr) ) + { + ResetContext( pContext ); + return hr; + } + + ConstantsBC6HBC7 param; + param.tex_width = static_cast<UINT>( srcImage.width ); + param.num_block_x = static_cast<UINT>( xblocks ); + param.format = m_bcformat; + param.mode_id = modes[i]; + param.start_block_id = start_block_id; + param.num_total_blocks = num_total_blocks; + param.alpha_weight = m_alphaWeight; + memcpy( mapped.pData, ¶m, sizeof( param ) ); + pContext->Unmap( m_constBuffer.Get(), 0 ); + } + + pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get(); + RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(), + (i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount ); + } + } + + pSRVs[1] = m_err2SRV.Get(); + RunComputeShader( pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(), + m_outputUAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) ); + } + else + { + //--- BC6H ---------------------------------------------------------------- + ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr }; + RunComputeShader( pContext, m_BC6H_tryModeG10CS.Get(), pSRVs, 2, m_constBuffer.Get(), + m_err1UAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) ); + + for ( UINT i = 0; i < 10; ++i ) + { + { + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED(hr) ) + { + ResetContext( pContext ); + return hr; + } + + ConstantsBC6HBC7 param; + param.tex_width = static_cast<UINT>( srcImage.width ); + param.num_block_x = static_cast<UINT>( xblocks ); + param.format = m_bcformat; + param.mode_id = i; + param.start_block_id = start_block_id; + param.num_total_blocks = num_total_blocks; + memcpy( mapped.pData, ¶m, sizeof( param ) ); + pContext->Unmap( m_constBuffer.Get(), 0 ); + } + + pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); + RunComputeShader( pContext, m_BC6H_tryModeLE10CS.Get(), pSRVs, 2, m_constBuffer.Get(), + (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), std::max<UINT>( (uThreadGroupCount + 1) / 2, 1) ); + } + + pSRVs[1] = m_err1SRV.Get(); + RunComputeShader( pContext, m_BC6H_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(), + m_outputUAV.Get(), std::max<UINT>( (uThreadGroupCount + 1) / 2, 1) ); + } + + start_block_id += n; + num_blocks -= n; + } + + ResetContext( pContext ); + + //--- Copy output texture back to CPU --------------------------------------------- + + pContext->CopyResource( m_outputCPU.Get(), m_output.Get() ); + + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map( m_outputCPU.Get(), 0, D3D11_MAP_READ, 0, &mapped ); + if ( SUCCEEDED(hr) ) + { + const uint8_t *pSrc = reinterpret_cast<const uint8_t *>( mapped.pData ); + uint8_t *pDest = destImage.pixels; + + size_t pitch = xblocks * sizeof( BufferBC6HBC7 ); + + size_t rows = std::max<size_t>( 1, ( destImage.height + 3 ) >> 2 ); + + for( size_t h = 0; h < rows; ++h ) + { + memcpy( pDest, pSrc, destImage.rowPitch ); + + pSrc += pitch; + pDest += destImage.rowPitch; + } + + pContext->Unmap( m_outputCPU.Get(), 0 ); + } + + return hr; +} + +}; // namespace
\ No newline at end of file diff --git a/thirdparty/directxtex/DirectXTex/BCDirectCompute.h b/thirdparty/directxtex/DirectXTex/BCDirectCompute.h new file mode 100644 index 00000000..8bbfa75a --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/BCDirectCompute.h @@ -0,0 +1,67 @@ +//------------------------------------------------------------------------------------- +// BCDirectCompute.h +// +// Direct3D 11 Compute Shader BC Compressor +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- + +#pragma once + +namespace DirectX +{ + +class GPUCompressBC +{ +public: + GPUCompressBC(); + + HRESULT Initialize( _In_ ID3D11Device* pDevice ); + + HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f, _In_ bool skip3subsets = true ); + + HRESULT Compress( _In_ const Image& srcImage, _In_ const Image& destImage ); + + DXGI_FORMAT GetSourceFormat() const { return m_srcformat; } + +private: + DXGI_FORMAT m_bcformat; + DXGI_FORMAT m_srcformat; + float m_alphaWeight; + bool m_skip3Subsets; + size_t m_width; + size_t m_height; + + Microsoft::WRL::ComPtr<ID3D11Device> m_device; + Microsoft::WRL::ComPtr<ID3D11DeviceContext> m_context; + + Microsoft::WRL::ComPtr<ID3D11Buffer> m_err1; + Microsoft::WRL::ComPtr<ID3D11UnorderedAccessView> m_err1UAV; + Microsoft::WRL::ComPtr<ID3D11ShaderResourceView> m_err1SRV; + + Microsoft::WRL::ComPtr<ID3D11Buffer> m_err2; + Microsoft::WRL::ComPtr<ID3D11UnorderedAccessView> m_err2UAV; + Microsoft::WRL::ComPtr<ID3D11ShaderResourceView> m_err2SRV; + + Microsoft::WRL::ComPtr<ID3D11Buffer> m_output; + Microsoft::WRL::ComPtr<ID3D11Buffer> m_outputCPU; + Microsoft::WRL::ComPtr<ID3D11UnorderedAccessView> m_outputUAV; + Microsoft::WRL::ComPtr<ID3D11Buffer> m_constBuffer; + + // Compute shader library + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC6H_tryModeG10CS; + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC6H_tryModeLE10CS; + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC6H_encodeBlockCS; + + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC7_tryMode456CS; + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC7_tryMode137CS; + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC7_tryMode02CS; + Microsoft::WRL::ComPtr<ID3D11ComputeShader> m_BC7_encodeBlockCS; +}; + +}; // namespace
\ No newline at end of file diff --git a/thirdparty/directxtex/DirectXTex/DDS.h b/thirdparty/directxtex/DirectXTex/DDS.h index 6e913957..4d74620c 100644 --- a/thirdparty/directxtex/DirectXTex/DDS.h +++ b/thirdparty/directxtex/DirectXTex/DDS.h @@ -18,12 +18,15 @@ // http://go.microsoft.com/fwlink/?LinkId=248926 //-------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once -#endif +#if defined(_XBOX_ONE) && defined(_TITLE) +#include <d3d11_x.h> +#else #include <dxgiformat.h> +#endif +// VS 2010's stdint.h conflicts with intsafe.h #pragma warning(push) #pragma warning(disable : 4005) #include <stdint.h> @@ -95,6 +98,9 @@ extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_R8G8_B8G8 = extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_G8R8_G8B8 = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('G','R','G','B'), 0, 0, 0, 0, 0 }; +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_YUY2 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('Y','U','Y','2'), 0, 0, 0, 0, 0 }; + extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8R8G8B8 = { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }; @@ -169,20 +175,34 @@ extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DX10 = #define DDS_FLAGS_VOLUME 0x00200000 // DDSCAPS2_VOLUME // Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION -typedef enum DDS_RESOURCE_DIMENSION +enum DDS_RESOURCE_DIMENSION { DDS_DIMENSION_TEXTURE1D = 2, DDS_DIMENSION_TEXTURE2D = 3, DDS_DIMENSION_TEXTURE3D = 4, -} DDS_RESOURCE_DIMENSION; +}; // Subset here matches D3D10_RESOURCE_MISC_FLAG and D3D11_RESOURCE_MISC_FLAG -typedef enum DDS_RESOURCE_MISC_FLAG +enum DDS_RESOURCE_MISC_FLAG +{ + DDS_RESOURCE_MISC_TEXTURECUBE = 0x4L, +}; + +enum DDS_MISC_FLAGS2 { - DDS_RESOURCE_MISC_TEXTURECUBE = 0x4L, -} DDS_RESOURCE_MISC_FLAG; + DDS_MISC_FLAGS2_ALPHA_MODE_MASK = 0x7L, +}; + +enum DDS_ALPHA_MODE +{ + DDS_ALPHA_MODE_UNKNOWN = 0, + DDS_ALPHA_MODE_STRAIGHT = 1, + DDS_ALPHA_MODE_PREMULTIPLIED = 2, + DDS_ALPHA_MODE_OPAQUE = 3, + DDS_ALPHA_MODE_CUSTOM = 4, +}; -typedef struct +struct DDS_HEADER { uint32_t dwSize; uint32_t dwFlags; @@ -198,17 +218,20 @@ typedef struct uint32_t dwCaps3; uint32_t dwCaps4; uint32_t dwReserved2; -} DDS_HEADER; +}; -typedef struct +struct DDS_HEADER_DXT10 { DXGI_FORMAT dxgiFormat; uint32_t resourceDimension; uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG uint32_t arraySize; - uint32_t reserved; -} DDS_HEADER_DXT10; + uint32_t miscFlags2; // see DDS_MISC_FLAGS2 +}; #pragma pack(pop) +static_assert( sizeof(DDS_HEADER) == 124, "DDS Header size mismatch" ); +static_assert( sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch"); + }; // namespace diff --git a/thirdparty/directxtex/DirectXTex/DirectXTex.h b/thirdparty/directxtex/DirectXTex/DirectXTex.h index c4d4b73c..0320e747 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTex.h +++ b/thirdparty/directxtex/DirectXTex/DirectXTex.h @@ -13,62 +13,104 @@ // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) && (_WIN32_WINNT <= _WIN32_WINNT_WIN8) +#error WIC is not supported on Windows Phone 8.0 #endif +// VS 2010's stdint.h conflicts with intsafe.h #pragma warning(push) #pragma warning(disable : 4005) #include <stdint.h> #pragma warning(pop) #include <algorithm> +#include <functional> + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include <d3d11_x.h> +#define DCOMMON_H_INCLUDED +#else +#include <d3d11_1.h> +#endif + +#include <ocidl.h> + +// VS 2010 doesn't support explicit calling convention for std::function +#ifndef DIRECTX_STD_CALLCONV +#if defined(_MSC_VER) && (_MSC_VER < 1700) +#define DIRECTX_STD_CALLCONV +#else +#define DIRECTX_STD_CALLCONV __cdecl +#endif +#endif -#include <dxgiformat.h> -#include <d3d11.h> +// VS 2010/2012 do not support =default =delete +#ifndef DIRECTX_CTOR_DEFAULT +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#define DIRECTX_CTOR_DEFAULT {} +#define DIRECTX_CTOR_DELETE ; +#else +#define DIRECTX_CTOR_DEFAULT =default; +#define DIRECTX_CTOR_DELETE =delete; +#endif +#endif -#define DIRECTX_TEX_VERSION 100 +#define DIRECTX_TEX_VERSION 132 namespace DirectX { + //--------------------------------------------------------------------------------- // DXGI Format Utilities - bool IsValid( _In_ DXGI_FORMAT fmt ); - bool IsCompressed( _In_ DXGI_FORMAT fmt ); - bool IsPacked( _In_ DXGI_FORMAT fmt ); - bool IsVideo( _In_ DXGI_FORMAT fmt ); - bool IsSRGB( _In_ DXGI_FORMAT fmt ); - bool IsTypeless( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsValid( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsCompressed( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsPacked( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsVideo( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsPlanar( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsPalettized( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsDepthStencil(_In_ DXGI_FORMAT fmt ); + bool __cdecl IsSRGB( _In_ DXGI_FORMAT fmt ); + bool __cdecl IsTypeless( _In_ DXGI_FORMAT fmt, _In_ bool partialTypeless = true ); + + bool __cdecl HasAlpha( _In_ DXGI_FORMAT fmt ); - size_t BitsPerPixel( _In_ DXGI_FORMAT fmt ); + size_t __cdecl BitsPerPixel( _In_ DXGI_FORMAT fmt ); + + size_t __cdecl BitsPerColor( _In_ DXGI_FORMAT fmt ); enum CP_FLAGS { CP_FLAGS_NONE = 0x0, // Normal operation CP_FLAGS_LEGACY_DWORD = 0x1, // Assume pitch is DWORD aligned instead of BYTE aligned + CP_FLAGS_PARAGRAPH = 0x2, // Assume pitch is 16-byte aligned instead of BYTE aligned + CP_FLAGS_YMM = 0x4, // Assume pitch is 32-byte aligned instead of BYTE aligned + CP_FLAGS_ZMM = 0x8, // Assume pitch is 64-byte aligned instead of BYTE aligned + CP_FLAGS_PAGE4K = 0x200, // Assume pitch is 4096-byte aligned instead of BYTE aligned CP_FLAGS_24BPP = 0x10000, // Override with a legacy 24 bits-per-pixel format size CP_FLAGS_16BPP = 0x20000, // Override with a legacy 16 bits-per-pixel format size CP_FLAGS_8BPP = 0x40000, // Override with a legacy 8 bits-per-pixel format size }; - void ComputePitch( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, - _Out_ size_t& rowPitch, _Out_ size_t& slicePitch, _In_ DWORD flags = CP_FLAGS_NONE ); + void __cdecl ComputePitch( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, + _Out_ size_t& rowPitch, _Out_ size_t& slicePitch, _In_ DWORD flags = CP_FLAGS_NONE ); - size_t ComputeScanlines( _In_ DXGI_FORMAT fmt, _In_ size_t height ); + size_t __cdecl ComputeScanlines( _In_ DXGI_FORMAT fmt, _In_ size_t height ); - DXGI_FORMAT MakeSRGB( _In_ DXGI_FORMAT fmt ); - DXGI_FORMAT MakeTypeless( _In_ DXGI_FORMAT fmt ); - DXGI_FORMAT MakeTypelessUNORM( _In_ DXGI_FORMAT fmt ); - DXGI_FORMAT MakeTypelessFLOAT( _In_ DXGI_FORMAT fmt ); + DXGI_FORMAT __cdecl MakeSRGB( _In_ DXGI_FORMAT fmt ); + DXGI_FORMAT __cdecl MakeTypeless( _In_ DXGI_FORMAT fmt ); + DXGI_FORMAT __cdecl MakeTypelessUNORM( _In_ DXGI_FORMAT fmt ); + DXGI_FORMAT __cdecl MakeTypelessFLOAT( _In_ DXGI_FORMAT fmt ); //--------------------------------------------------------------------------------- // Texture metadata enum TEX_DIMENSION // Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION { - TEX_DIMENSION_TEXTURE1D = 2, - TEX_DIMENSION_TEXTURE2D = 3, - TEX_DIMENSION_TEXTURE3D = 4, + TEX_DIMENSION_TEXTURE1D = 2, + TEX_DIMENSION_TEXTURE2D = 3, + TEX_DIMENSION_TEXTURE3D = 4, }; enum TEX_MISC_FLAG @@ -77,6 +119,21 @@ namespace DirectX TEX_MISC_TEXTURECUBE = 0x4L, }; + enum TEX_MISC_FLAG2 + { + TEX_MISC2_ALPHA_MODE_MASK = 0x7L, + }; + + enum TEX_ALPHA_MODE + // Matches DDS_ALPHA_MODE, encoded in MISC_FLAGS2 + { + TEX_ALPHA_MODE_UNKNOWN = 0, + TEX_ALPHA_MODE_STRAIGHT = 1, + TEX_ALPHA_MODE_PREMULTIPLIED = 2, + TEX_ALPHA_MODE_OPAQUE = 3, + TEX_ALPHA_MODE_CUSTOM = 4, + }; + struct TexMetadata { size_t width; @@ -85,11 +142,22 @@ namespace DirectX size_t arraySize; // For cubemap, this is a multiple of 6 size_t mipLevels; uint32_t miscFlags; + uint32_t miscFlags2; DXGI_FORMAT format; TEX_DIMENSION dimension; - size_t ComputeIndex( _In_ size_t mip, _In_ size_t item, _In_ size_t slice ) const; + size_t __cdecl ComputeIndex( _In_ size_t mip, _In_ size_t item, _In_ size_t slice ) const; // Returns size_t(-1) to indicate an out-of-range error + + bool __cdecl IsCubemap() const { return (miscFlags & TEX_MISC_TEXTURECUBE) != 0; } + // Helper for miscFlags + + bool __cdecl IsPMAlpha() const { return ((miscFlags2 & TEX_MISC2_ALPHA_MODE_MASK) == TEX_ALPHA_MODE_PREMULTIPLIED) != 0; } + void __cdecl SetAlphaMode( TEX_ALPHA_MODE mode ) { miscFlags2 = (miscFlags2 & ~TEX_MISC2_ALPHA_MODE_MASK) | static_cast<uint32_t>(mode); } + // Helpers for miscFlags2 + + bool __cdecl IsVolumemap() const { return (dimension == TEX_DIMENSION_TEXTURE3D); } + // Helper for dimension }; enum DDS_FLAGS @@ -111,8 +179,14 @@ namespace DirectX DDS_FLAGS_NO_16BPP = 0x10, // Conversions avoid use of 565, 5551, and 4444 formats and instead expand to 8888 to avoid use of optional WDDM 1.2 formats + DDS_FLAGS_EXPAND_LUMINANCE = 0x20, + // When loading legacy luminance formats expand replicating the color channels rather than leaving them packed (L8, L16, A8L8) + DDS_FLAGS_FORCE_DX10_EXT = 0x10000, // Always use the 'DX10' header extension for DDS writer (i.e. don't try to write DX9 compatible DDS files) + + DDS_FLAGS_FORCE_DX10_EXT_MISC2 = 0x20000, + // DDS_FLAGS_FORCE_DX10_EXT including miscFlags2 information (result may not be compatible with D3DX10 or D3DX11) }; enum WIC_FLAGS @@ -129,11 +203,14 @@ namespace DirectX // Loads 565, 5551, and 4444 formats as 8888 to avoid use of optional WDDM 1.2 formats WIC_FLAGS_ALLOW_MONO = 0x8, - // Loads 1-bit monochrome (black & white) as R1_UNORM rather than 8-bit greyscale + // Loads 1-bit monochrome (black & white) as R1_UNORM rather than 8-bit grayscale WIC_FLAGS_ALL_FRAMES = 0x10, // Loads all images in a multi-frame file, converting/resizing to match the first frame as needed, defaults to 0th frame otherwise + WIC_FLAGS_IGNORE_SRGB = 0x20, + // Ignores sRGB metadata if present in the file + WIC_FLAGS_DITHER = 0x10000, // Use ordered 4x4 dithering for any required conversions @@ -147,20 +224,20 @@ namespace DirectX // Filtering mode to use for any required image resizing (only needed when loading arrays of differently sized images; defaults to Fant) }; - HRESULT GetMetadataFromDDSMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, - _Out_ TexMetadata& metadata ); - HRESULT GetMetadataFromDDSFile( _In_z_ LPCWSTR szFile, DWORD flags, - _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromDDSMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, + _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromDDSFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, + _Out_ TexMetadata& metadata ); - HRESULT GetMetadataFromTGAMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, - _Out_ TexMetadata& metadata ); - HRESULT GetMetadataFromTGAFile( _In_z_ LPCWSTR szFile, - _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromTGAMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, + _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromTGAFile( _In_z_ LPCWSTR szFile, + _Out_ TexMetadata& metadata ); - HRESULT GetMetadataFromWICMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, - _Out_ TexMetadata& metadata ); - HRESULT GetMetadataFromWICFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, - _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromWICMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, + _Out_ TexMetadata& metadata ); + HRESULT __cdecl GetMetadataFromWICFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, + _Out_ TexMetadata& metadata ); //--------------------------------------------------------------------------------- // Bitmap image container @@ -177,33 +254,40 @@ namespace DirectX class ScratchImage { public: - ScratchImage() : _nimages(0), _size(0), _image(0), _memory(0) {} + ScratchImage() + : _nimages(0), _size(0), _image(nullptr), _memory(nullptr) {} + ScratchImage(ScratchImage&& moveFrom) + : _nimages(0), _size(0), _image(nullptr), _memory(nullptr) { *this = std::move(moveFrom); } ~ScratchImage() { Release(); } - HRESULT Initialize( _In_ const TexMetadata& mdata ); + ScratchImage& __cdecl operator= (ScratchImage&& moveFrom); - HRESULT Initialize1D( _In_ DXGI_FORMAT fmt, _In_ size_t length, _In_ size_t arraySize, _In_ size_t mipLevels ); - HRESULT Initialize2D( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t arraySize, _In_ size_t mipLevels ); - HRESULT Initialize3D( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t depth, _In_ size_t mipLevels ); - HRESULT InitializeCube( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t nCubes, _In_ size_t mipLevels ); + HRESULT __cdecl Initialize( _In_ const TexMetadata& mdata, _In_ DWORD flags = CP_FLAGS_NONE ); - HRESULT InitializeFromImage( _In_ const Image& srcImage, _In_ bool allow1D = false ); - HRESULT InitializeArrayFromImages( _In_count_(nImages) const Image* images, _In_ size_t nImages, _In_ bool allow1D = false ); - HRESULT InitializeCubeFromImages( _In_count_(nImages) const Image* images, _In_ size_t nImages ); - HRESULT Initialize3DFromImages( _In_count_(depth) const Image* images, _In_ size_t depth ); + HRESULT __cdecl Initialize1D( _In_ DXGI_FORMAT fmt, _In_ size_t length, _In_ size_t arraySize, _In_ size_t mipLevels, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl Initialize2D( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t arraySize, _In_ size_t mipLevels, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl Initialize3D( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t depth, _In_ size_t mipLevels, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl InitializeCube( _In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t nCubes, _In_ size_t mipLevels, _In_ DWORD flags = CP_FLAGS_NONE ); - void Release(); + HRESULT __cdecl InitializeFromImage( _In_ const Image& srcImage, _In_ bool allow1D = false, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl InitializeArrayFromImages( _In_reads_(nImages) const Image* images, _In_ size_t nImages, _In_ bool allow1D = false, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl InitializeCubeFromImages( _In_reads_(nImages) const Image* images, _In_ size_t nImages, _In_ DWORD flags = CP_FLAGS_NONE ); + HRESULT __cdecl Initialize3DFromImages( _In_reads_(depth) const Image* images, _In_ size_t depth, _In_ DWORD flags = CP_FLAGS_NONE ); - bool OverrideFormat( _In_ DXGI_FORMAT f ); + void __cdecl Release(); - const TexMetadata& GetMetadata() const { return _metadata; } - const Image* GetImage(_In_ size_t mip, _In_ size_t item, _In_ size_t slice) const; + bool __cdecl OverrideFormat( _In_ DXGI_FORMAT f ); - const Image* GetImages() const { return _image; } - size_t GetImageCount() const { return _nimages; } + const TexMetadata& __cdecl GetMetadata() const { return _metadata; } + const Image* __cdecl GetImage(_In_ size_t mip, _In_ size_t item, _In_ size_t slice) const; - uint8_t* GetPixels() const { return _memory; } - size_t GetPixelsSize() const { return _size; } + const Image* __cdecl GetImages() const { return _image; } + size_t __cdecl GetImageCount() const { return _nimages; } + + uint8_t* __cdecl GetPixels() const { return _memory; } + size_t __cdecl GetPixelsSize() const { return _size; } + + bool __cdecl IsAlphaAllOpaque() const; private: size_t _nimages; @@ -222,15 +306,18 @@ namespace DirectX class Blob { public: - Blob() : _buffer(0), _size(0) {} + Blob() : _buffer(nullptr), _size(0) {} + Blob(Blob&& moveFrom) : _buffer(nullptr), _size(0) { *this = std::move(moveFrom); } ~Blob() { Release(); } - HRESULT Initialize( _In_ size_t size ); + Blob& __cdecl operator= (Blob&& moveFrom); + + HRESULT __cdecl Initialize( _In_ size_t size ); - void Release(); + void __cdecl Release(); - void *GetBufferPointer() const { return _buffer; } - size_t GetBufferSize() const { return _size; } + void *__cdecl GetBufferPointer() const { return _buffer; } + size_t __cdecl GetBufferSize() const { return _size; } private: void* _buffer; @@ -245,43 +332,47 @@ namespace DirectX // Image I/O // DDS operations - HRESULT LoadFromDDSMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - HRESULT LoadFromDDSFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + HRESULT __cdecl LoadFromDDSMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + HRESULT __cdecl LoadFromDDSFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - HRESULT SaveToDDSMemory( _In_ const Image& image, _In_ DWORD flags, - _Out_ Blob& blob ); - HRESULT SaveToDDSMemory( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ const TexMetadata& metadata, _In_ DWORD flags, - _Out_ Blob& blob ); + HRESULT __cdecl SaveToDDSMemory( _In_ const Image& image, _In_ DWORD flags, + _Out_ Blob& blob ); + HRESULT __cdecl SaveToDDSMemory( _In_reads_(nimages) const Image* images, _In_ size_t nimages, _In_ const TexMetadata& metadata, _In_ DWORD flags, + _Out_ Blob& blob ); - HRESULT SaveToDDSFile( _In_ const Image& image, _In_ DWORD flags, _In_z_ LPCWSTR szFile ); - HRESULT SaveToDDSFile( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ const TexMetadata& metadata, _In_ DWORD flags, _In_z_ LPCWSTR szFile ); + HRESULT __cdecl SaveToDDSFile( _In_ const Image& image, _In_ DWORD flags, _In_z_ LPCWSTR szFile ); + HRESULT __cdecl SaveToDDSFile( _In_reads_(nimages) const Image* images, _In_ size_t nimages, _In_ const TexMetadata& metadata, _In_ DWORD flags, _In_z_ LPCWSTR szFile ); // TGA operations - HRESULT LoadFromTGAMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - HRESULT LoadFromTGAFile( _In_z_ LPCWSTR szFile, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + HRESULT __cdecl LoadFromTGAMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + HRESULT __cdecl LoadFromTGAFile( _In_z_ LPCWSTR szFile, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - HRESULT SaveToTGAMemory( _In_ const Image& image, _Out_ Blob& blob ); - HRESULT SaveToTGAFile( _In_ const Image& image, _In_z_ LPCWSTR szFile ); + HRESULT __cdecl SaveToTGAMemory( _In_ const Image& image, _Out_ Blob& blob ); + HRESULT __cdecl SaveToTGAFile( _In_ const Image& image, _In_z_ LPCWSTR szFile ); // WIC operations - HRESULT LoadFromWICMemory( _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - HRESULT LoadFromWICFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, - _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); - - HRESULT SaveToWICMemory( _In_ const Image& image, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, - _Out_ Blob& blob, _In_opt_ const GUID* targetFormat = nullptr ); - HRESULT SaveToWICMemory( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, - _Out_ Blob& blob, _In_opt_ const GUID* targetFormat = nullptr ); - - HRESULT SaveToWICFile( _In_ const Image& image, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, - _In_z_ LPCWSTR szFile, _In_opt_ const GUID* targetFormat = nullptr ); - HRESULT SaveToWICFile( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, - _In_z_ LPCWSTR szFile, _In_opt_ const GUID* targetFormat = nullptr ); + HRESULT __cdecl LoadFromWICMemory( _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DWORD flags, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + HRESULT __cdecl LoadFromWICFile( _In_z_ LPCWSTR szFile, _In_ DWORD flags, + _Out_opt_ TexMetadata* metadata, _Out_ ScratchImage& image ); + + HRESULT __cdecl SaveToWICMemory( _In_ const Image& image, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, + _Out_ Blob& blob, _In_opt_ const GUID* targetFormat = nullptr, + _In_opt_ std::function<void DIRECTX_STD_CALLCONV(IPropertyBag2*)> setCustomProps = nullptr ); + HRESULT __cdecl SaveToWICMemory( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, + _Out_ Blob& blob, _In_opt_ const GUID* targetFormat = nullptr, + _In_opt_ std::function<void DIRECTX_STD_CALLCONV(IPropertyBag2*)> setCustomProps = nullptr ); + + HRESULT __cdecl SaveToWICFile( _In_ const Image& image, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, + _In_z_ LPCWSTR szFile, _In_opt_ const GUID* targetFormat = nullptr, + _In_opt_ std::function<void DIRECTX_STD_CALLCONV(IPropertyBag2*)> setCustomProps = nullptr ); + HRESULT __cdecl SaveToWICFile( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, _In_ REFGUID guidContainerFormat, + _In_z_ LPCWSTR szFile, _In_opt_ const GUID* targetFormat = nullptr, + _In_opt_ std::function<void DIRECTX_STD_CALLCONV(IPropertyBag2*)> setCustomProps = nullptr ); enum WICCodecs { @@ -294,7 +385,7 @@ namespace DirectX WIC_CODEC_ICO, // Windows Icon (.ico) }; - REFGUID GetWICCodec( _In_ WICCodecs codec ); + REFGUID __cdecl GetWICCodec( _In_ WICCodecs codec ); //--------------------------------------------------------------------------------- // Texture conversion, resizing, mipmap generation, and block compression @@ -309,20 +400,34 @@ namespace DirectX TEX_FR_FLIP_VERTICAL = 0x10, }; - HRESULT FlipRotate( _In_ const Image& srcImage, _In_ DWORD flags, _Out_ ScratchImage& image ); - HRESULT FlipRotate( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DWORD flags, _Out_ ScratchImage& result ); + HRESULT __cdecl FlipRotate( _In_ const Image& srcImage, _In_ DWORD flags, _Out_ ScratchImage& image ); + HRESULT __cdecl FlipRotate( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DWORD flags, _Out_ ScratchImage& result ); // Flip and/or rotate image enum TEX_FILTER_FLAGS { TEX_FILTER_DEFAULT = 0, - // Clamp filtering only + TEX_FILTER_WRAP_U = 0x1, + TEX_FILTER_WRAP_V = 0x2, + TEX_FILTER_WRAP_W = 0x4, + TEX_FILTER_WRAP = ( TEX_FILTER_WRAP_U | TEX_FILTER_WRAP_V | TEX_FILTER_WRAP_W ), + TEX_FILTER_MIRROR_U = 0x10, + TEX_FILTER_MIRROR_V = 0x20, + TEX_FILTER_MIRROR_W = 0x40, + TEX_FILTER_MIRROR = ( TEX_FILTER_MIRROR_U | TEX_FILTER_MIRROR_V | TEX_FILTER_MIRROR_W ), + // Wrap vs. Mirror vs. Clamp filtering options TEX_FILTER_SEPARATE_ALPHA = 0x100, // Resize color and alpha channel independently + TEX_FILTER_RGB_COPY_RED = 0x1000, + TEX_FILTER_RGB_COPY_GREEN = 0x2000, + TEX_FILTER_RGB_COPY_BLUE = 0x4000, + // When converting RGB to R, defaults to using grayscale. These flags indicate copying a specific channel instead + // When converting RGB to RG, defaults to copying RED | GREEN. These flags control which channels are selected instead. + TEX_FILTER_DITHER = 0x10000, // Use ordered 4x4 dithering for any required conversions TEX_FILTER_DITHER_DIFFUSION = 0x20000, @@ -331,44 +436,75 @@ namespace DirectX TEX_FILTER_POINT = 0x100000, TEX_FILTER_LINEAR = 0x200000, TEX_FILTER_CUBIC = 0x300000, + TEX_FILTER_BOX = 0x400000, TEX_FILTER_FANT = 0x400000, // Equiv to Box filtering for mipmap generation + TEX_FILTER_TRIANGLE = 0x500000, // Filtering mode to use for any required image resizing TEX_FILTER_SRGB_IN = 0x1000000, TEX_FILTER_SRGB_OUT = 0x2000000, - TEX_FILTER_SRGB = 0x3000000, + TEX_FILTER_SRGB = ( TEX_FILTER_SRGB_IN | TEX_FILTER_SRGB_OUT ), // sRGB <-> RGB for use in conversion operations // if the input format type is IsSRGB(), then SRGB_IN is on by default // if the output format type is IsSRGB(), then SRGB_OUT is on by default + + TEX_FILTER_FORCE_NON_WIC = 0x10000000, + // Forces use of the non-WIC path when both are an option + + TEX_FILTER_FORCE_WIC = 0x20000000, + // Forces use of the WIC path even when logic would have picked a non-WIC path when both are an option }; - HRESULT Resize( _In_ const Image& srcImage, _In_ size_t width, _In_ size_t height, _In_ DWORD filter, - _Out_ ScratchImage& image ); - HRESULT Resize( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ size_t width, _In_ size_t height, _In_ DWORD filter, _Out_ ScratchImage& result ); + HRESULT __cdecl Resize( _In_ const Image& srcImage, _In_ size_t width, _In_ size_t height, _In_ DWORD filter, + _Out_ ScratchImage& image ); + HRESULT __cdecl Resize( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ size_t width, _In_ size_t height, _In_ DWORD filter, _Out_ ScratchImage& result ); // Resize the image to width x height. Defaults to Fant filtering. // Note for a complex resize, the result will always have mipLevels == 1 - HRESULT Convert( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, - _Out_ ScratchImage& image ); - HRESULT Convert( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage& result ); + HRESULT __cdecl Convert( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, + _Out_ ScratchImage& image ); + HRESULT __cdecl Convert( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage& result ); // Convert the image to a new format - HRESULT GenerateMipMaps( _In_ const Image& baseImage, _In_ DWORD filter, _In_ size_t levels, - _Out_ ScratchImage& mipChain, bool allow1D = false ); - HRESULT GenerateMipMaps( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DWORD filter, _In_ size_t levels, _Out_ ScratchImage& mipChain ); + HRESULT __cdecl ConvertToSinglePlane( _In_ const Image& srcImage, _Out_ ScratchImage& image ); + HRESULT __cdecl ConvertToSinglePlane( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _Out_ ScratchImage& image ); + // Converts the image from a planar format to an equivalent non-planar format + + HRESULT __cdecl GenerateMipMaps( _In_ const Image& baseImage, _In_ DWORD filter, _In_ size_t levels, + _Inout_ ScratchImage& mipChain, _In_ bool allow1D = false ); + HRESULT __cdecl GenerateMipMaps( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DWORD filter, _In_ size_t levels, _Inout_ ScratchImage& mipChain ); // levels of '0' indicates a full mipchain, otherwise is generates that number of total levels (including the source base image) // Defaults to Fant filtering which is equivalent to a box filter - HRESULT GenerateMipMaps3D( _In_count_(depth) const Image* baseImages, _In_ size_t depth, _In_ DWORD filter, _In_ size_t levels, - _Out_ ScratchImage& mipChain ); - HRESULT GenerateMipMaps3D( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DWORD filter, _In_ size_t levels, _Out_ ScratchImage& mipChain ); + HRESULT __cdecl GenerateMipMaps3D( _In_reads_(depth) const Image* baseImages, _In_ size_t depth, _In_ DWORD filter, _In_ size_t levels, + _Out_ ScratchImage& mipChain ); + HRESULT __cdecl GenerateMipMaps3D( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DWORD filter, _In_ size_t levels, _Out_ ScratchImage& mipChain ); // levels of '0' indicates a full mipchain, otherwise is generates that number of total levels (including the source base image) // Defaults to Fant filtering which is equivalent to a box filter + enum TEX_PMALPHA_FLAGS + { + TEX_PMALPHA_DEFAULT = 0, + + TEX_PMALPHA_IGNORE_SRGB = 0x1, + // ignores sRGB colorspace conversions + + TEX_PMALPHA_SRGB_IN = 0x1000000, + TEX_PMALPHA_SRGB_OUT = 0x2000000, + TEX_PMALPHA_SRGB = ( TEX_PMALPHA_SRGB_IN | TEX_PMALPHA_SRGB_OUT ), + // if the input format type is IsSRGB(), then SRGB_IN is on by default + // if the output format type is IsSRGB(), then SRGB_OUT is on by default + }; + + HRESULT __cdecl PremultiplyAlpha( _In_ const Image& srcImage, _In_ DWORD flags, _Out_ ScratchImage& image ); + HRESULT __cdecl PremultiplyAlpha( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, _In_ DWORD flags, _Out_ ScratchImage& result ); + // Converts to a premultiplied alpha version of the texture + enum TEX_COMPRESS_FLAGS { TEX_COMPRESS_DEFAULT = 0, @@ -385,19 +521,34 @@ namespace DirectX TEX_COMPRESS_UNIFORM = 0x40000, // Uniform color weighting for BC1-3 compression; by default uses perceptual weighting + TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000, + // Enables exhaustive search for BC7 compress for mode 0 and 2; by default skips trying these modes + + TEX_COMPRESS_SRGB_IN = 0x1000000, + TEX_COMPRESS_SRGB_OUT = 0x2000000, + TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ), + // if the input format type is IsSRGB(), then SRGB_IN is on by default + // if the output format type is IsSRGB(), then SRGB_OUT is on by default + TEX_COMPRESS_PARALLEL = 0x10000000, // Compress is free to use multithreading to improve performance (by default it does not use multithreading) }; - HRESULT Compress( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaRef, - _Out_ ScratchImage& cImage ); - HRESULT Compress( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaRef, _Out_ ScratchImage& cImages ); + HRESULT __cdecl Compress( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaRef, + _Out_ ScratchImage& cImage ); + HRESULT __cdecl Compress( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaRef, _Out_ ScratchImage& cImages ); // Note that alphaRef is only used by BC1. 0.5f is a typical value to use - HRESULT Decompress( _In_ const Image& cImage, _In_ DXGI_FORMAT format, _Out_ ScratchImage& image ); - HRESULT Decompress( _In_count_(nimages) const Image* cImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DXGI_FORMAT format, _Out_ ScratchImage& images ); + HRESULT __cdecl Compress( _In_ ID3D11Device* pDevice, _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _In_ DWORD compress, + _In_ float alphaWeight, _Out_ ScratchImage& image ); + HRESULT __cdecl Compress( _In_ ID3D11Device* pDevice, _In_ const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaWeight, _Out_ ScratchImage& cImages ); + // DirectCompute-based compression (alphaWeight is only used by BC7. 1.0 is the typical value to use) + + HRESULT __cdecl Decompress( _In_ const Image& cImage, _In_ DXGI_FORMAT format, _Out_ ScratchImage& image ); + HRESULT __cdecl Decompress( _In_reads_(nimages) const Image* cImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DXGI_FORMAT format, _Out_ ScratchImage& images ); //--------------------------------------------------------------------------------- // Normal map operations @@ -426,10 +577,10 @@ namespace DirectX // Computes a crude occlusion term stored in the alpha channel }; - HRESULT ComputeNormalMap( _In_ const Image& srcImage, _In_ DWORD flags, _In_ float amplitude, - _In_ DXGI_FORMAT format, _Out_ ScratchImage& normalMap ); - HRESULT ComputeNormalMap( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DWORD flags, _In_ float amplitude, _In_ DXGI_FORMAT format, _Out_ ScratchImage& normalMaps ); + HRESULT __cdecl ComputeNormalMap( _In_ const Image& srcImage, _In_ DWORD flags, _In_ float amplitude, + _In_ DXGI_FORMAT format, _Out_ ScratchImage& normalMap ); + HRESULT __cdecl ComputeNormalMap( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DWORD flags, _In_ float amplitude, _In_ DXGI_FORMAT format, _Out_ ScratchImage& normalMaps ); //--------------------------------------------------------------------------------- // Misc image operations @@ -440,26 +591,53 @@ namespace DirectX size_t w; size_t h; - Rect() {} + Rect() DIRECTX_CTOR_DEFAULT Rect( size_t _x, size_t _y, size_t _w, size_t _h ) : x(_x), y(_y), w(_w), h(_h) {} }; - HRESULT CopyRectangle( _In_ const Image& srcImage, _In_ const Rect& srcRect, _In_ const Image& dstImage, - _In_ DWORD filter, _In_ size_t xOffset, _In_ size_t yOffset ); + HRESULT __cdecl CopyRectangle( _In_ const Image& srcImage, _In_ const Rect& srcRect, _In_ const Image& dstImage, + _In_ DWORD filter, _In_ size_t xOffset, _In_ size_t yOffset ); + + enum CMSE_FLAGS + { + CMSE_DEFAULT = 0, - HRESULT ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, _Out_ float& mse, _Out_opt_cap_c_(4) float* mseV ); + CMSE_IMAGE1_SRGB = 0x1, + CMSE_IMAGE2_SRGB = 0x2, + // Indicates that image needs gamma correction before comparision + + CMSE_IGNORE_RED = 0x10, + CMSE_IGNORE_GREEN = 0x20, + CMSE_IGNORE_BLUE = 0x40, + CMSE_IGNORE_ALPHA = 0x80, + // Ignore the channel when computing MSE + + CMSE_IMAGE1_X2_BIAS = 0x100, + CMSE_IMAGE2_X2_BIAS = 0x200, + // Indicates that image should be scaled and biased before comparison (i.e. UNORM -> SNORM) + }; + + HRESULT __cdecl ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, _Out_ float& mse, _Out_writes_opt_(4) float* mseV, _In_ DWORD flags = 0 ); //--------------------------------------------------------------------------------- // Direct3D 11 functions - bool IsSupportedTexture( _In_ ID3D11Device* pDevice, _In_ const TexMetadata& metadata ); + bool __cdecl IsSupportedTexture( _In_ ID3D11Device* pDevice, _In_ const TexMetadata& metadata ); + + HRESULT __cdecl CreateTexture( _In_ ID3D11Device* pDevice, _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _Outptr_ ID3D11Resource** ppResource ); + + HRESULT __cdecl CreateShaderResourceView( _In_ ID3D11Device* pDevice, _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _Outptr_ ID3D11ShaderResourceView** ppSRV ); - HRESULT CreateTexture( _In_ ID3D11Device* pDevice, _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _Deref_out_ ID3D11Resource** ppResource ); + HRESULT __cdecl CreateTextureEx( _In_ ID3D11Device* pDevice, _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ D3D11_USAGE usage, _In_ unsigned int bindFlags, _In_ unsigned int cpuAccessFlags, _In_ unsigned int miscFlags, _In_ bool forceSRGB, + _Outptr_ ID3D11Resource** ppResource ); - HRESULT CreateShaderResourceView( _In_ ID3D11Device* pDevice, _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _Deref_out_ ID3D11ShaderResourceView** ppSRV ); + HRESULT __cdecl CreateShaderResourceViewEx( _In_ ID3D11Device* pDevice, _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ D3D11_USAGE usage, _In_ unsigned int bindFlags, _In_ unsigned int cpuAccessFlags, _In_ unsigned int miscFlags, _In_ bool forceSRGB, + _Outptr_ ID3D11ShaderResourceView** ppSRV ); - HRESULT CaptureTexture( _In_ ID3D11Device* pDevice, _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource* pSource, _Out_ ScratchImage& result ); + HRESULT __cdecl CaptureTexture( _In_ ID3D11Device* pDevice, _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource* pSource, _Out_ ScratchImage& result ); #include "DirectXTex.inl" diff --git a/thirdparty/directxtex/DirectXTex/DirectXTex.inl b/thirdparty/directxtex/DirectXTex/DirectXTex.inl index 909cd402..a6e806ba 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTex.inl +++ b/thirdparty/directxtex/DirectXTex/DirectXTex.inl @@ -13,24 +13,20 @@ // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once -#endif //===================================================================================== // DXGI Format Utilities //===================================================================================== -inline bool IsValid( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsValid( DXGI_FORMAT fmt ) { -#ifdef DXGI_1_2_FORMATS - return ( static_cast<size_t>(fmt) >= 1 && static_cast<size_t>(fmt) <= 115 ); -#else - return ( static_cast<size_t>(fmt) >= 1 && static_cast<size_t>(fmt) <= 99 ); -#endif + return ( static_cast<size_t>(fmt) >= 1 && static_cast<size_t>(fmt) <= 120 ); } -inline bool IsCompressed( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsCompressed(DXGI_FORMAT fmt) { switch ( fmt ) { @@ -62,14 +58,65 @@ inline bool IsCompressed( DXGI_FORMAT fmt ) } } -inline bool IsPacked( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsPacked(DXGI_FORMAT fmt) { - return ( (fmt == DXGI_FORMAT_R8G8_B8G8_UNORM) || (fmt == DXGI_FORMAT_G8R8_G8B8_UNORM) ); + switch( fmt ) + { + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_YUY2: // 4:2:2 8-bit + case DXGI_FORMAT_Y210: // 4:2:2 10-bit + case DXGI_FORMAT_Y216: // 4:2:2 16-bit + return true; + + default: + return false; + } } -inline bool IsVideo( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsPlanar(DXGI_FORMAT fmt) +{ + switch ( static_cast<int>(fmt) ) + { + case DXGI_FORMAT_NV12: // 4:2:0 8-bit + case DXGI_FORMAT_P010: // 4:2:0 10-bit + case DXGI_FORMAT_P016: // 4:2:0 16-bit + case DXGI_FORMAT_420_OPAQUE:// 4:2:0 8-bit + case DXGI_FORMAT_NV11: // 4:1:1 8-bit + return true; + + case 118 /* DXGI_FORMAT_D16_UNORM_S8_UINT */: + case 119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */: + case 120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */: + // These are Xbox One platform specific types + return true; + + default: + return false; + } +} + +_Use_decl_annotations_ +inline bool __cdecl IsPalettized(DXGI_FORMAT fmt) +{ + switch( fmt ) + { + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: + case DXGI_FORMAT_A8P8: + return true; + + default: + return false; + } +} + +_Use_decl_annotations_ +inline bool __cdecl IsVideo(DXGI_FORMAT fmt) { -#ifdef DXGI_1_2_FORMATS switch ( fmt ) { case DXGI_FORMAT_AYUV: @@ -83,7 +130,6 @@ inline bool IsVideo( DXGI_FORMAT fmt ) case DXGI_FORMAT_Y216: case DXGI_FORMAT_NV11: // These video formats can be used with the 3D pipeline through special view mappings - return true; case DXGI_FORMAT_420_OPAQUE: case DXGI_FORMAT_AI44: @@ -96,13 +142,33 @@ inline bool IsVideo( DXGI_FORMAT fmt ) default: return false; } -#else // !DXGI_1_2_FORMATS - UNREFERENCED_PARAMETER(fmt); - return false; -#endif } -inline bool IsSRGB( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsDepthStencil(DXGI_FORMAT fmt) +{ + switch( static_cast<int>(fmt) ) + { + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_D16_UNORM: + case 118 /* DXGI_FORMAT_D16_UNORM_S8_UINT */: + case 119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */: + case 120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */: + return true; + + default: + return false; + } +} + +_Use_decl_annotations_ +inline bool __cdecl IsSRGB(DXGI_FORMAT fmt) { switch( fmt ) { @@ -120,24 +186,21 @@ inline bool IsSRGB( DXGI_FORMAT fmt ) } } -inline bool IsTypeless( DXGI_FORMAT fmt ) +_Use_decl_annotations_ +inline bool __cdecl IsTypeless(DXGI_FORMAT fmt, bool partialTypeless) { - switch( fmt ) + switch( static_cast<int>(fmt) ) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32_TYPELESS: case DXGI_FORMAT_R16G16B16A16_TYPELESS: case DXGI_FORMAT_R32G32_TYPELESS: case DXGI_FORMAT_R32G8X24_TYPELESS: - case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: - case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: case DXGI_FORMAT_R10G10B10A2_TYPELESS: case DXGI_FORMAT_R8G8B8A8_TYPELESS: case DXGI_FORMAT_R16G16_TYPELESS: case DXGI_FORMAT_R32_TYPELESS: case DXGI_FORMAT_R24G8_TYPELESS: - case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: - case DXGI_FORMAT_X24_TYPELESS_G8_UINT: case DXGI_FORMAT_R8G8_TYPELESS: case DXGI_FORMAT_R16_TYPELESS: case DXGI_FORMAT_R8_TYPELESS: @@ -152,15 +215,47 @@ inline bool IsTypeless( DXGI_FORMAT fmt ) case DXGI_FORMAT_BC7_TYPELESS: return true; + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return partialTypeless; + + case 119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */: + case 120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */: + // These are Xbox One platform specific types + return partialTypeless; + default: return false; } } -inline size_t ComputeScanlines( _In_ DXGI_FORMAT fmt, _In_ size_t height ) +_Use_decl_annotations_ +inline bool __cdecl HasAlpha(DXGI_FORMAT fmt) { - switch ( fmt ) + switch( static_cast<int>(fmt) ) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_A8_UNORM: case DXGI_FORMAT_BC1_TYPELESS: case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: @@ -170,21 +265,51 @@ inline size_t ComputeScanlines( _In_ DXGI_FORMAT fmt, _In_ size_t height ) case DXGI_FORMAT_BC3_TYPELESS: case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: - case DXGI_FORMAT_BC4_TYPELESS: - case DXGI_FORMAT_BC4_UNORM: - case DXGI_FORMAT_BC4_SNORM: - case DXGI_FORMAT_BC5_TYPELESS: - case DXGI_FORMAT_BC5_UNORM: - case DXGI_FORMAT_BC5_SNORM: - case DXGI_FORMAT_BC6H_TYPELESS: - case DXGI_FORMAT_BC6H_UF16: - case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_BC7_TYPELESS: case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: - return std::max<size_t>( 1, (height + 3) / 4 ); + case DXGI_FORMAT_AYUV: + case DXGI_FORMAT_Y410: + case DXGI_FORMAT_Y416: + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_A8P8: + case DXGI_FORMAT_B4G4R4A4_UNORM: + return true; + + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: + // These are Xbox One platform specific types + return true; default: + return false; + } +} + +_Use_decl_annotations_ +inline size_t __cdecl ComputeScanlines(DXGI_FORMAT fmt, size_t height) +{ + if ( IsCompressed(fmt) ) + { + return std::max<size_t>( 1, (height + 3) / 4 ); + } + else if ( fmt == DXGI_FORMAT_NV11 ) + { + // Direct3D makes this simplifying assumption, although it is larger than the 4:1:1 data + return height * 2; + } + else if ( IsPlanar(fmt) ) + { + return height + ( ( height + 1 ) >> 1 ); + } + else + { return height; } } @@ -192,7 +317,8 @@ inline size_t ComputeScanlines( _In_ DXGI_FORMAT fmt, _In_ size_t height ) //===================================================================================== // Image I/O //===================================================================================== -inline HRESULT SaveToDDSMemory( const Image& image, DWORD flags, Blob& blob ) +_Use_decl_annotations_ +inline HRESULT __cdecl SaveToDDSMemory(const Image& image, DWORD flags, Blob& blob) { TexMetadata mdata; memset( &mdata, 0, sizeof(mdata) ); @@ -207,7 +333,8 @@ inline HRESULT SaveToDDSMemory( const Image& image, DWORD flags, Blob& blob ) return SaveToDDSMemory( &image, 1, mdata, flags, blob ); } -inline HRESULT SaveToDDSFile( const Image& image, DWORD flags, LPCWSTR szFile ) +_Use_decl_annotations_ +inline HRESULT __cdecl SaveToDDSFile(const Image& image, DWORD flags, LPCWSTR szFile) { TexMetadata mdata; memset( &mdata, 0, sizeof(mdata) ); diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexCompress.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexCompress.cpp index 9c1bfbd8..52fb3d45 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexCompress.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexCompress.cpp @@ -17,11 +17,12 @@ #ifdef _OPENMP #include <omp.h> -#pragma warning(disable : 4616 6001 6993) +#pragma warning(disable : 4616 6993) #endif #include "BC.h" + namespace DirectX { @@ -31,13 +32,46 @@ inline static DWORD _GetBCFlags( _In_ DWORD compress ) static_assert( TEX_COMPRESS_A_DITHER == BC_FLAGS_DITHER_A, "TEX_COMPRESS_* flags should match BC_FLAGS_*" ); static_assert( TEX_COMPRESS_DITHER == (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A), "TEX_COMPRESS_* flags should match BC_FLAGS_*" ); static_assert( TEX_COMPRESS_UNIFORM == BC_FLAGS_UNIFORM, "TEX_COMPRESS_* flags should match BC_FLAGS_*" ); - return ( compress & (BC_FLAGS_DITHER_RGB|BC_FLAGS_DITHER_A|BC_FLAGS_UNIFORM) ); + static_assert( TEX_COMPRESS_BC7_USE_3SUBSETS == BC_FLAGS_USE_3SUBSETS, "TEX_COMPRESS_* flags should match BC_FLAGS_*" ); + return ( compress & (BC_FLAGS_DITHER_RGB|BC_FLAGS_DITHER_A|BC_FLAGS_UNIFORM|BC_FLAGS_USE_3SUBSETS) ); +} + +inline static DWORD _GetSRGBFlags( _In_ DWORD compress ) +{ + static_assert( TEX_COMPRESS_SRGB_IN == TEX_FILTER_SRGB_IN, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_COMPRESS_SRGB_OUT == TEX_FILTER_SRGB_OUT, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_COMPRESS_SRGB == TEX_FILTER_SRGB, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + return ( compress & TEX_COMPRESS_SRGB ); +} + +inline static bool _DetermineEncoderSettings( _In_ DXGI_FORMAT format, _Out_ BC_ENCODE& pfEncode, _Out_ size_t& blocksize, _Out_ DWORD& cflags ) +{ + switch(format) + { + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: pfEncode = nullptr; blocksize = 8; cflags = 0; break; + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: pfEncode = D3DXEncodeBC2; blocksize = 16; cflags = 0; break; + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: pfEncode = D3DXEncodeBC3; blocksize = 16; cflags = 0; break; + case DXGI_FORMAT_BC4_UNORM: pfEncode = D3DXEncodeBC4U; blocksize = 8; cflags = TEX_FILTER_RGB_COPY_RED; break; + case DXGI_FORMAT_BC4_SNORM: pfEncode = D3DXEncodeBC4S; blocksize = 8; cflags = TEX_FILTER_RGB_COPY_RED; break; + case DXGI_FORMAT_BC5_UNORM: pfEncode = D3DXEncodeBC5U; blocksize = 16; cflags = TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN; break; + case DXGI_FORMAT_BC5_SNORM: pfEncode = D3DXEncodeBC5S; blocksize = 16; cflags = TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN; break; + case DXGI_FORMAT_BC6H_UF16: pfEncode = D3DXEncodeBC6HU; blocksize = 16; cflags = 0; break; + case DXGI_FORMAT_BC6H_SF16: pfEncode = D3DXEncodeBC6HS; blocksize = 16; cflags = 0; break; + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: pfEncode = D3DXEncodeBC7; blocksize = 16; cflags = 0; break; + default: pfEncode = nullptr; blocksize = 0; cflags = 0; return false; + } + + return true; } //------------------------------------------------------------------------------------- static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, - _In_ float alphaRef, _In_ bool degenerate ) + _In_ DWORD srgb, _In_ float alphaRef ) { if ( !image.pixels || !result.pixels ) return E_POINTER; @@ -64,25 +98,9 @@ static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _ // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; - switch(result.format) - { - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: pfEncode = nullptr; blocksize = 8; break; - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: pfEncode = D3DXEncodeBC2; blocksize = 16; break; - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: pfEncode = D3DXEncodeBC3; blocksize = 16; break; - case DXGI_FORMAT_BC4_UNORM: pfEncode = D3DXEncodeBC4U; blocksize = 8; break; - case DXGI_FORMAT_BC4_SNORM: pfEncode = D3DXEncodeBC4S; blocksize = 8; break; - case DXGI_FORMAT_BC5_UNORM: pfEncode = D3DXEncodeBC5U; blocksize = 16; break; - case DXGI_FORMAT_BC5_SNORM: pfEncode = D3DXEncodeBC5S; blocksize = 16; break; - case DXGI_FORMAT_BC6H_UF16: pfEncode = D3DXEncodeBC6HU; blocksize = 16; break; - case DXGI_FORMAT_BC6H_SF16: pfEncode = D3DXEncodeBC6HS; blocksize = 16; break; - case DXGI_FORMAT_BC7_UNORM: - case DXGI_FORMAT_BC7_UNORM_SRGB: pfEncode = D3DXEncodeBC7; blocksize = 16; break; - default: + DWORD cflags; + if ( !_DetermineEncoderSettings( result.format, pfEncode, blocksize, cflags ) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); - } XMVECTOR temp[16]; const uint8_t *pSrc = image.pixels; @@ -91,55 +109,65 @@ static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _ { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; - for( size_t count = 0; count < rowPitch; count += sbpp*4 ) + size_t ph = std::min<size_t>( 4, image.height - h ); + size_t w = 0; + for( size_t count = 0; (count < result.rowPitch) && (w < image.width); count += blocksize, w += 4 ) { - if ( !_LoadScanline( &temp[0], 4, sptr, rowPitch, format ) ) + size_t pw = std::min<size_t>( 4, image.width - w ); + assert( pw > 0 && ph > 0 ); + + if ( !_LoadScanline( &temp[0], pw, sptr, rowPitch, format ) ) return E_FAIL; - if ( image.height > 1 ) + if ( ph > 1 ) { - if ( !_LoadScanline( &temp[4], 4, sptr + rowPitch, rowPitch, format ) ) + if ( !_LoadScanline( &temp[4], pw, sptr + rowPitch, rowPitch, format ) ) return E_FAIL; - if ( image.height > 2 ) + if ( ph > 2 ) { - if ( !_LoadScanline( &temp[8], 4, sptr + rowPitch*2, rowPitch, format ) ) + if ( !_LoadScanline( &temp[8], pw, sptr + rowPitch*2, rowPitch, format ) ) return E_FAIL; - if ( !_LoadScanline( &temp[12], 4, sptr + rowPitch*3, rowPitch, format ) ) - return E_FAIL; + if ( ph > 3 ) + { + if ( !_LoadScanline( &temp[12], pw, sptr + rowPitch*3, rowPitch, format ) ) + return E_FAIL; + } } } - if ( degenerate ) + if ( pw != 4 || ph != 4 ) { - assert( image.width < 4 || image.height < 4 ); - const size_t uSrc[] = { 0, 0, 0, 1 }; + // Replicate pixels for partial block + static const size_t uSrc[] = { 0, 0, 0, 1 }; - if ( image.width < 4 ) + if ( pw < 4 ) { - for( size_t t=0; t < image.height && t < 4; ++t ) + for( size_t t = 0; t < ph && t < 4; ++t ) { - for( size_t s = image.width; s < 4; ++s ) + for( size_t s = pw; s < 4; ++s ) { - temp[ t*4 + s ] = temp[ t*4 + uSrc[s] ]; +#pragma prefast(suppress: 26000, "PREFAST false positive") + temp[ (t << 2) | s ] = temp[ (t << 2) | uSrc[s] ]; } } } - if ( image.height < 4 ) + if ( ph < 4 ) { - for( size_t t=image.height; t < 4; ++t ) + for( size_t t = ph; t < 4; ++t ) { - for( size_t s =0; s < 4; ++s ) + for( size_t s = 0; s < 4; ++s ) { - temp[ t*4 + s ] = temp[ uSrc[t]*4 + s ]; +#pragma prefast(suppress: 26000, "PREFAST false positive") + temp[ (t << 2) | s ] = temp[ (uSrc[t] << 2) | s ]; } } } } - _ConvertScanline( temp, 16, result.format, format, 0 ); + _ConvertScanline( temp, 16, result.format, format, cflags | srgb ); if ( pfEncode ) pfEncode( dptr, temp, bcflags ); @@ -161,14 +189,11 @@ static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _ //------------------------------------------------------------------------------------- #ifdef _OPENMP static HRESULT _CompressBC_Parallel( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, - _In_ float alphaRef ) + _In_ DWORD srgb, _In_ float alphaRef ) { if ( !image.pixels || !result.pixels ) return E_POINTER; - // Parallel version doesn't support degenerate case - assert( ((image.width % 4) == 0) && ((image.height % 4) == 0 ) ); - assert( image.width == result.width ); assert( image.height == result.height ); @@ -189,35 +214,19 @@ static HRESULT _CompressBC_Parallel( _In_ const Image& image, _In_ const Image& // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; - switch(result.format) - { - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: pfEncode = nullptr; blocksize = 8; break; - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: pfEncode = D3DXEncodeBC2; blocksize = 16; break; - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: pfEncode = D3DXEncodeBC3; blocksize = 16; break; - case DXGI_FORMAT_BC4_UNORM: pfEncode = D3DXEncodeBC4U; blocksize = 8; break; - case DXGI_FORMAT_BC4_SNORM: pfEncode = D3DXEncodeBC4S; blocksize = 8; break; - case DXGI_FORMAT_BC5_UNORM: pfEncode = D3DXEncodeBC5U; blocksize = 16; break; - case DXGI_FORMAT_BC5_SNORM: pfEncode = D3DXEncodeBC5S; blocksize = 16; break; - case DXGI_FORMAT_BC6H_UF16: pfEncode = D3DXEncodeBC6HU; blocksize = 16; break; - case DXGI_FORMAT_BC6H_SF16: pfEncode = D3DXEncodeBC6HS; blocksize = 16; break; - case DXGI_FORMAT_BC7_UNORM: - case DXGI_FORMAT_BC7_UNORM_SRGB: pfEncode = D3DXEncodeBC7; blocksize = 16; break; - default: + DWORD cflags; + if ( !_DetermineEncoderSettings( result.format, pfEncode, blocksize, cflags ) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); - } // Refactored version of loop to support parallel independance - const size_t nBlocks = std::max<size_t>(1, image.width / 4) * std::max<size_t>(1, image.height / 4); + const size_t nBlocks = std::max<size_t>(1, (image.width + 3) / 4 ) * std::max<size_t>(1, (image.height + 3) / 4 ); bool fail = false; #pragma omp parallel for for( int nb=0; nb < static_cast<int>( nBlocks ); ++nb ) { - const size_t nbWidth = std::max<size_t>(1, image.width / 4); + const size_t nbWidth = std::max<size_t>(1, (image.width + 3) / 4 ); const size_t y = nb / nbWidth; const size_t x = nb - (y*nbWidth); @@ -229,20 +238,61 @@ static HRESULT _CompressBC_Parallel( _In_ const Image& image, _In_ const Image& uint8_t *pDest = result.pixels + (nb*blocksize); + size_t ph = std::min<size_t>( 4, image.height - y ); + size_t pw = std::min<size_t>( 4, image.width - x ); + assert( pw > 0 && ph > 0 ); + XMVECTOR temp[16]; - if ( !_LoadScanline( &temp[0], 4, pSrc, rowPitch, format ) ) + if ( !_LoadScanline( &temp[0], pw, pSrc, rowPitch, format ) ) fail = true; - if ( !_LoadScanline( &temp[4], 4, pSrc + rowPitch, rowPitch, format ) ) - fail = true; + if ( ph > 1 ) + { + if ( !_LoadScanline( &temp[4], pw, pSrc + rowPitch, rowPitch, format ) ) + fail = true; - if ( !_LoadScanline( &temp[8], 4, pSrc + rowPitch*2, rowPitch, format ) ) - fail = true; + if ( ph > 2 ) + { + if ( !_LoadScanline( &temp[8], pw, pSrc + rowPitch*2, rowPitch, format ) ) + fail = true; - if ( !_LoadScanline( &temp[12], 4, pSrc + rowPitch*3, rowPitch, format ) ) - fail = true; + if ( ph > 3 ) + { + if ( !_LoadScanline( &temp[12], pw, pSrc + rowPitch*3, rowPitch, format ) ) + fail = true; + } + } + } + + if ( pw != 4 || ph != 4 ) + { + // Replicate pixels for partial block + static const size_t uSrc[] = { 0, 0, 0, 1 }; + + if ( pw < 4 ) + { + for( size_t t = 0; t < ph && t < 4; ++t ) + { + for( size_t s = pw; s < 4; ++s ) + { + temp[ (t << 2) | s ] = temp[ (t << 2) | uSrc[s] ]; + } + } + } + + if ( ph < 4 ) + { + for( size_t t = ph; t < 4; ++t ) + { + for( size_t s = 0; s < 4; ++s ) + { + temp[ (t << 2) | s ] = temp[ (uSrc[t] << 2) | s ]; + } + } + } + } - _ConvertScanline( temp, 16, result.format, format, 0 ); + _ConvertScanline( temp, 16, result.format, format, cflags | srgb ); if ( pfEncode ) pfEncode( pDest, temp, bcflags ); @@ -313,21 +363,6 @@ static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result assert( cImage.width == result.width ); assert( cImage.height == result.height ); - // Image must be a multiple of 4 (degenerate cases of 1x1, 1x2, 2x1, and 2x2 are allowed) - size_t width = cImage.width; - if ( (width % 4) != 0 ) - { - if ( width != 1 && width != 2 ) - return E_INVALIDARG; - } - - size_t height = cImage.height; - if ( (height % 4) != 0 ) - { - if ( height != 1 && height != 2 ) - return E_INVALIDARG; - } - const DXGI_FORMAT format = result.format; size_t dbpp = BitsPerPixel( format ); if ( !dbpp ) @@ -390,26 +425,34 @@ static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; - for( size_t count = 0; count < cImage.rowPitch; count += sbpp ) + size_t ph = std::min<size_t>( 4, cImage.height - h ); + size_t w = 0; + for( size_t count = 0; (count < cImage.rowPitch) && (w < cImage.width); count += sbpp, w += 4 ) { pfDecode( temp, sptr ); _ConvertScanline( temp, 16, format, cformat, 0 ); - if ( !_StoreScanline( dptr, rowPitch, format, &temp[0], 4 ) ) + size_t pw = std::min<size_t>( 4, cImage.width - w ); + assert( pw > 0 && ph > 0 ); + + if ( !_StoreScanline( dptr, rowPitch, format, &temp[0], pw ) ) return E_FAIL; - if ( result.height > 1 ) + if ( ph > 1 ) { - if ( !_StoreScanline( dptr + rowPitch, rowPitch, format, &temp[4], 4 ) ) + if ( !_StoreScanline( dptr + rowPitch, rowPitch, format, &temp[4], pw ) ) return E_FAIL; - if ( result.height > 2 ) + if ( ph > 2 ) { - if ( !_StoreScanline( dptr + rowPitch*2, rowPitch, format, &temp[8], 4 ) ) + if ( !_StoreScanline( dptr + rowPitch*2, rowPitch, format, &temp[8], pw ) ) return E_FAIL; - if ( !_StoreScanline( dptr + rowPitch*3, rowPitch, format, &temp[12], 4 ) ) - return E_FAIL; + if ( ph > 3 ) + { + if ( !_StoreScanline( dptr + rowPitch*3, rowPitch, format, &temp[12], pw ) ) + return E_FAIL; + } } } @@ -425,6 +468,92 @@ static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result } +//------------------------------------------------------------------------------------- +bool _IsAlphaAllOpaqueBC( _In_ const Image& cImage ) +{ + if ( !cImage.pixels ) + return false; + + // Promote "typeless" BC formats + DXGI_FORMAT cformat; + switch( cImage.format ) + { + case DXGI_FORMAT_BC1_TYPELESS: cformat = DXGI_FORMAT_BC1_UNORM; break; + case DXGI_FORMAT_BC2_TYPELESS: cformat = DXGI_FORMAT_BC2_UNORM; break; + case DXGI_FORMAT_BC3_TYPELESS: cformat = DXGI_FORMAT_BC3_UNORM; break; + case DXGI_FORMAT_BC7_TYPELESS: cformat = DXGI_FORMAT_BC7_UNORM; break; + default: cformat = cImage.format; break; + } + + // Determine BC format decoder + BC_DECODE pfDecode; + size_t sbpp; + switch(cformat) + { + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: pfDecode = D3DXDecodeBC1; sbpp = 8; break; + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: pfDecode = D3DXDecodeBC2; sbpp = 16; break; + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: pfDecode = D3DXDecodeBC3; sbpp = 16; break; + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: pfDecode = D3DXDecodeBC7; sbpp = 16; break; + default: + // BC4, BC5, and BC6 don't have alpha channels + return false; + } + + // Scan blocks for non-opaque alpha + static const XMVECTORF32 threshold = { 0.99f, 0.99f, 0.99f, 0.99f }; + + XMVECTOR temp[16]; + const uint8_t *pPixels = cImage.pixels; + for( size_t h = 0; h < cImage.height; h += 4 ) + { + const uint8_t *ptr = pPixels; + size_t ph = std::min<size_t>( 4, cImage.height - h ); + size_t w = 0; + for( size_t count = 0; (count < cImage.rowPitch) && (w < cImage.width); count += sbpp, w += 4 ) + { + pfDecode( temp, ptr ); + + size_t pw = std::min<size_t>( 4, cImage.width - w ); + assert( pw > 0 && ph > 0 ); + + if ( pw == 4 && ph == 4 ) + { + // Full blocks + for( size_t j = 0; j < 16; ++j ) + { + XMVECTOR alpha = XMVectorSplatW( temp[j] ); + if ( XMVector4Less( alpha, threshold ) ) + return false; + } + } + else + { + // Handle partial blocks + for( size_t y = 0; y < ph; ++y ) + { + for( size_t x = 0; x < pw; ++x ) + { + XMVECTOR alpha = XMVectorSplatW( temp[ y * 4 + x ] ); + if ( XMVector4Less( alpha, threshold ) ) + return false; + } + } + } + + ptr += sbpp; + } + + pPixels += cImage.rowPitch; + } + + return true; +} + + //===================================================================================== // Entry-points //===================================================================================== @@ -432,34 +561,18 @@ static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result //------------------------------------------------------------------------------------- // Compression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Compress( const Image& srcImage, DXGI_FORMAT format, DWORD compress, float alphaRef, ScratchImage& image ) { - if ( IsCompressed(srcImage.format) || !IsCompressed(format) || IsTypeless(format) ) + if ( IsCompressed(srcImage.format) || !IsCompressed(format) ) return E_INVALIDARG; - // Image size must be a multiple of 4 (degenerate cases for mipmaps are allowed) - bool degenerate = false; - - size_t width = srcImage.width; - if ( (width % 4) != 0 ) - { - if ( width != 1 && width != 2 ) - return E_INVALIDARG; - - degenerate = true; - } - - size_t height = srcImage.height; - if ( (height % 4) != 0 ) - { - if ( height != 1 && height != 2 ) - return E_INVALIDARG; - - degenerate = true; - } + if ( IsTypeless(format) + || IsTypeless(srcImage.format) || IsPlanar(srcImage.format) || IsPalettized(srcImage.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); // Create compressed image - HRESULT hr = image.Initialize2D( format, width, height, 1, 1 ); + HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); if ( FAILED(hr) ) return hr; @@ -471,17 +584,17 @@ HRESULT Compress( const Image& srcImage, DXGI_FORMAT format, DWORD compress, flo } // Compress single image - if ( (compress & TEX_COMPRESS_PARALLEL) && !degenerate ) + if (compress & TEX_COMPRESS_PARALLEL) { #ifndef _OPENMP return E_NOTIMPL; #else - hr = _CompressBC_Parallel( srcImage, *img, _GetBCFlags( compress ), alphaRef ); + hr = _CompressBC_Parallel( srcImage, *img, _GetBCFlags( compress ), _GetSRGBFlags( compress ), alphaRef ); #endif // _OPENMP } else { - hr = _CompressBC( srcImage, *img, _GetBCFlags( compress ), alphaRef, degenerate ); + hr = _CompressBC( srcImage, *img, _GetBCFlags( compress ), _GetSRGBFlags( compress ), alphaRef ); } if ( FAILED(hr) ) @@ -490,29 +603,19 @@ HRESULT Compress( const Image& srcImage, DXGI_FORMAT format, DWORD compress, flo return hr; } +_Use_decl_annotations_ HRESULT Compress( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, DWORD compress, float alphaRef, ScratchImage& cImages ) { if ( !srcImages || !nimages ) return E_INVALIDARG; - if ( !IsCompressed(format) || IsTypeless(format) ) + if ( IsCompressed(metadata.format) || !IsCompressed(format) ) return E_INVALIDARG; - // Image size must be a multiple of 4 (degenerate cases for mipmaps are allowed) - size_t width = srcImages[0].width; - if ( (width % 4) != 0 ) - { - if ( width != 1 && width != 2 ) - return E_INVALIDARG; - } - - size_t height = srcImages[0].height; - if ( (height % 4) != 0 ) - { - if ( height != 1 && height != 2 ) - return E_INVALIDARG; - } + if ( IsTypeless(format) + || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); cImages.Release(); @@ -541,24 +644,20 @@ HRESULT Compress( const Image* srcImages, size_t nimages, const TexMetadata& met const Image& src = srcImages[ index ]; - height = src.height; - width = src.width; - if ( width != dest[ index ].width || height != dest[ index ].height ) + if ( src.width != dest[ index ].width || src.height != dest[ index ].height ) { cImages.Release(); return E_FAIL; } - bool degenerate = ((height < 4) || (width < 4)) != 0; - - if ( (compress & TEX_COMPRESS_PARALLEL) && !degenerate) + if ( (compress & TEX_COMPRESS_PARALLEL) ) { #ifndef _OPENMP return E_NOTIMPL; #else if ( compress & TEX_COMPRESS_PARALLEL ) { - hr = _CompressBC_Parallel( src, dest[ index ], _GetBCFlags( compress ), alphaRef ); + hr = _CompressBC_Parallel( src, dest[ index ], _GetBCFlags( compress ), _GetSRGBFlags( compress ), alphaRef ); if ( FAILED(hr) ) { cImages.Release(); @@ -569,7 +668,7 @@ HRESULT Compress( const Image* srcImages, size_t nimages, const TexMetadata& met } else { - hr = _CompressBC( src, dest[ index ], _GetBCFlags( compress ), alphaRef, degenerate ); + hr = _CompressBC( src, dest[ index ], _GetBCFlags( compress ), _GetSRGBFlags( compress ), alphaRef ); if ( FAILED(hr) ) { cImages.Release(); @@ -585,9 +684,10 @@ HRESULT Compress( const Image* srcImages, size_t nimages, const TexMetadata& met //------------------------------------------------------------------------------------- // Decompression //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Decompress( const Image& cImage, DXGI_FORMAT format, ScratchImage& image ) { - if ( IsCompressed(format) || IsTypeless(format) ) + if ( !IsCompressed(cImage.format) || IsCompressed(format) ) return E_INVALIDARG; if ( format == DXGI_FORMAT_UNKNOWN ) @@ -600,8 +700,14 @@ HRESULT Decompress( const Image& cImage, DXGI_FORMAT format, ScratchImage& image return E_INVALIDARG; } } - else if ( !IsCompressed(cImage.format) || !IsValid(format) ) - return E_INVALIDARG; + else + { + if ( !IsValid(format) ) + return E_INVALIDARG; + + if ( IsTypeless(format) || IsPlanar(format) || IsPalettized(format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } // Create decompressed image HRESULT hr = image.Initialize2D( format, cImage.width, cImage.height, 1, 1 ); @@ -623,13 +729,14 @@ HRESULT Decompress( const Image& cImage, DXGI_FORMAT format, ScratchImage& image return hr; } +_Use_decl_annotations_ HRESULT Decompress( const Image* cImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, ScratchImage& images ) { if ( !cImages || !nimages ) return E_INVALIDARG; - if ( IsCompressed(format) || IsTypeless(format) ) + if ( !IsCompressed(metadata.format) || IsCompressed(format) ) return E_INVALIDARG; if ( format == DXGI_FORMAT_UNKNOWN ) @@ -642,8 +749,14 @@ HRESULT Decompress( const Image* cImages, size_t nimages, const TexMetadata& met return E_FAIL; } } - else if ( !IsValid(format) ) - return E_INVALIDARG; + else + { + if ( !IsValid(format) ) + return E_INVALIDARG; + + if ( IsTypeless(format) || IsPlanar(format) || IsPalettized(format) ) + HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } images.Release(); diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexCompressGPU.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexCompressGPU.cpp new file mode 100644 index 00000000..46b1a867 --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/DirectXTexCompressGPU.cpp @@ -0,0 +1,402 @@ +//------------------------------------------------------------------------------------- +// DirectXTexCompressGPU.cpp +// +// DirectX Texture Library - DirectCompute-based texture compression +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +//------------------------------------------------------------------------------------- + +#include "directxtexp.h" + +#include "bcdirectcompute.h" + +namespace DirectX +{ + +inline static DWORD _GetSRGBFlags( _In_ DWORD compress ) +{ + static_assert( TEX_COMPRESS_SRGB_IN == TEX_FILTER_SRGB_IN, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_COMPRESS_SRGB_OUT == TEX_FILTER_SRGB_OUT, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_COMPRESS_SRGB == TEX_FILTER_SRGB, "TEX_COMPRESS_SRGB* should match TEX_FILTER_SRGB*" ); + return ( compress & TEX_COMPRESS_SRGB ); +} + + +//------------------------------------------------------------------------------------- +// Converts to R8G8B8A8_UNORM or R8G8B8A8_UNORM_SRGB doing any conversion logic needed +//------------------------------------------------------------------------------------- +static HRESULT _ConvertToRGBA32( _In_ const Image& srcImage, _In_ ScratchImage& image, bool srgb, _In_ DWORD filter ) +{ + if ( !srcImage.pixels ) + return E_POINTER; + + DXGI_FORMAT format = srgb ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB : DXGI_FORMAT_R8G8B8A8_UNORM; + + HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); + if ( FAILED(hr) ) + return hr; + + const Image *img = image.GetImage( 0, 0, 0 ); + if ( !img ) + { + image.Release(); + return E_POINTER; + } + + uint8_t* pDest = img->pixels; + if ( !pDest ) + { + image.Release(); + return E_POINTER; + } + + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( ( sizeof(XMVECTOR) * srcImage.width ), 16 ) ) ); + if ( !scanline ) + { + image.Release(); + return E_OUTOFMEMORY; + } + + const uint8_t *pSrc = srcImage.pixels; + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) + { + image.Release(); + return E_FAIL; + } + + _ConvertScanline( scanline.get(), srcImage.width, format, srcImage.format, filter ); + + if ( !_StoreScanline( pDest, img->rowPitch, format, scanline.get(), srcImage.width ) ) + { + image.Release(); + return E_FAIL; + } + + pSrc += srcImage.rowPitch; + pDest += img->rowPitch; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Converts to DXGI_FORMAT_R32G32B32A32_FLOAT doing any conversion logic needed +//------------------------------------------------------------------------------------- +static HRESULT _ConvertToRGBAF32( const Image& srcImage, ScratchImage& image, _In_ DWORD filter ) +{ + if ( !srcImage.pixels ) + return E_POINTER; + + HRESULT hr = image.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 ); + if ( FAILED(hr) ) + return hr; + + const Image *img = image.GetImage( 0, 0, 0 ); + if ( !img ) + { + image.Release(); + return E_POINTER; + } + + uint8_t* pDest = img->pixels; + if ( !pDest ) + { + image.Release(); + return E_POINTER; + } + + const uint8_t *pSrc = srcImage.pixels; + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) + { + image.Release(); + return E_FAIL; + } + + _ConvertScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.format, filter ); + + pSrc += srcImage.rowPitch; + pDest += img->rowPitch; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Compress using GPU, converting to the proper input format for the shader if needed +//------------------------------------------------------------------------------------- +inline static HRESULT _GPUCompress( _In_ GPUCompressBC* gpubc, _In_ const Image& srcImage, _In_ const Image& destImage, _In_ DWORD compress ) +{ + if ( !gpubc ) + return E_POINTER; + + assert( srcImage.pixels && destImage.pixels ); + + DXGI_FORMAT format = gpubc->GetSourceFormat(); + + if ( srcImage.format == format ) + { + // Input is already in our required source format + return gpubc->Compress( srcImage, destImage ); + } + else + { + // Convert format and then use as the source image + ScratchImage image; + HRESULT hr; + + DWORD srgb = _GetSRGBFlags( compress ); + + switch( format ) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: + hr = _ConvertToRGBA32( srcImage, image, false, srgb ); + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + hr = _ConvertToRGBA32( srcImage, image, true, srgb ); + break; + + case DXGI_FORMAT_R32G32B32A32_FLOAT: + hr = _ConvertToRGBAF32( srcImage, image, srgb ); + break; + + default: + hr = E_UNEXPECTED; + break; + } + + if ( FAILED(hr) ) + return hr; + + const Image *img = image.GetImage( 0, 0, 0 ); + if ( !img ) + return E_POINTER; + + return gpubc->Compress( *img, destImage ); + } +} + + +//===================================================================================== +// Entry-points +//===================================================================================== + +//------------------------------------------------------------------------------------- +// Compression +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT Compress( ID3D11Device* pDevice, const Image& srcImage, DXGI_FORMAT format, DWORD compress, float alphaWeight, ScratchImage& image ) +{ + if ( !pDevice || IsCompressed(srcImage.format) || !IsCompressed(format) ) + return E_INVALIDARG; + + if ( IsTypeless(format) + || IsTypeless(srcImage.format) || IsPlanar(srcImage.format) || IsPalettized(srcImage.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + + // Setup GPU compressor + std::unique_ptr<GPUCompressBC> gpubc( new (std::nothrow) GPUCompressBC ); + if ( !gpubc ) + return E_OUTOFMEMORY; + + HRESULT hr = gpubc->Initialize( pDevice ); + if ( FAILED(hr) ) + return hr; + + hr = gpubc->Prepare( srcImage.width, srcImage.height, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) ); + if ( FAILED(hr) ) + return hr; + + // Create workspace for result + hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); + if ( FAILED(hr) ) + return hr; + + const Image *img = image.GetImage( 0, 0, 0 ); + if ( !img ) + { + image.Release(); + return E_POINTER; + } + + hr = _GPUCompress( gpubc.get(), srcImage, *img, compress ); + if ( FAILED(hr) ) + image.Release(); + + return hr; +} + +_Use_decl_annotations_ +HRESULT Compress( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata, + DXGI_FORMAT format, DWORD compress, float alphaWeight, ScratchImage& cImages ) +{ + if ( !pDevice || !srcImages || !nimages ) + return E_INVALIDARG; + + if ( IsCompressed(metadata.format) || !IsCompressed(format) ) + return E_INVALIDARG; + + if ( IsTypeless(format) + || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + + cImages.Release(); + + // Setup GPU compressor + std::unique_ptr<GPUCompressBC> gpubc( new (std::nothrow) GPUCompressBC ); + if ( !gpubc ) + return E_OUTOFMEMORY; + + HRESULT hr = gpubc->Initialize( pDevice ); + if ( FAILED(hr) ) + return hr; + + // Create workspace for result + TexMetadata mdata2 = metadata; + mdata2.format = format; + hr = cImages.Initialize( mdata2 ); + if ( FAILED(hr) ) + return hr; + + if ( nimages != cImages.GetImageCount() ) + { + cImages.Release(); + return E_FAIL; + } + + const Image* dest = cImages.GetImages(); + if ( !dest ) + { + cImages.Release(); + return E_POINTER; + } + + // Process images (ordered by size) + switch( metadata.dimension ) + { + case TEX_DIMENSION_TEXTURE1D: + case TEX_DIMENSION_TEXTURE2D: + { + size_t w = metadata.width; + size_t h = metadata.height; + + for( size_t level=0; level < metadata.mipLevels; ++level ) + { + hr = gpubc->Prepare( w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) ); + if ( FAILED(hr) ) + { + cImages.Release(); + return hr; + } + + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + size_t index = metadata.ComputeIndex( level, item, 0 ); + if ( index >= nimages ) + { + cImages.Release(); + return E_FAIL; + } + + assert( dest[ index ].format == format ); + + const Image& src = srcImages[ index ]; + + if ( src.width != dest[ index ].width || src.height != dest[ index ].height ) + { + cImages.Release(); + return E_FAIL; + } + + hr = _GPUCompress( gpubc.get(), src, dest[ index ], compress ); + if ( FAILED(hr) ) + { + cImages.Release(); + return hr; + } + } + + if ( h > 1 ) + h >>= 1; + + if ( w > 1 ) + w >>= 1; + } + } + break; + + case TEX_DIMENSION_TEXTURE3D: + { + size_t w = metadata.width; + size_t h = metadata.height; + size_t d = metadata.depth; + + for( size_t level=0; level < metadata.mipLevels; ++level ) + { + hr = gpubc->Prepare( w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) ); + if ( FAILED(hr) ) + { + cImages.Release(); + return hr; + } + + for( size_t slice=0; slice < d; ++slice ) + { + size_t index = metadata.ComputeIndex( level, 0, slice ); + if ( index >= nimages ) + { + cImages.Release(); + return E_FAIL; + } + + assert( dest[ index ].format == format ); + + const Image& src = srcImages[ index ]; + + if ( src.width != dest[ index ].width || src.height != dest[ index ].height ) + { + cImages.Release(); + return E_FAIL; + } + + hr = _GPUCompress( gpubc.get(), src, dest[ index ], compress ); + if ( FAILED(hr) ) + { + cImages.Release(); + return hr; + } + } + + if ( h > 1 ) + h >>= 1; + + if ( w > 1 ) + w >>= 1; + + if ( d > 1 ) + d >>= 1; + } + } + break; + + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + + return S_OK; +} + +}; // namespace diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexConvert.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexConvert.cpp index 3496ce5b..c063213a 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexConvert.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexConvert.cpp @@ -23,28 +23,196 @@ using namespace DirectX::PackedVector; #endif +using Microsoft::WRL::ComPtr; + +namespace +{ +#if DIRECTX_MATH_VERSION < 306 + inline float round_to_nearest( float x ) + { + // Round to nearest (even) + float i = floorf(x); + x -= i; + if(x < 0.5f) + return i; + if(x > 0.5f) + return i + 1.f; + + float int_part; + modff( i / 2.f, &int_part ); + if ( (2.f*int_part) == i ) + { + return i; + } + + return i + 1.f; + } +#endif + + inline uint32_t FloatTo7e3(float Value) + { + uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0]; + + if ( IValue & 0x80000000U ) + { + // Positive only + return 0; + } + else if (IValue > 0x41FF73FFU) + { + // The number is too large to be represented as a 7e3. Saturate. + return 0x3FFU; + } + else + { + if (IValue < 0x3E800000U) + { + // The number is too small to be represented as a normalized 7e3. + // Convert it to a denormalized value. + uint32_t Shift = 125U - (IValue >> 23U); + IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized 7e3. + IValue += 0xC2000000U; + } + + return ((IValue + 0x7FFFU + ((IValue >> 16U) & 1U)) >> 16U)&0x3FFU; + } + } + + inline float FloatFrom7e3( uint32_t Value ) + { + uint32_t Mantissa = (uint32_t)(Value & 0x7F); + + uint32_t Exponent = (Value & 0x380); + if (Exponent != 0) // The value is normalized + { + Exponent = (uint32_t)((Value >> 7) & 0x7); + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x80) == 0); + + Mantissa &= 0x7F; + } + else // The value is zero + { + Exponent = (uint32_t)-124; + } + + uint32_t Result = ((Exponent + 124) << 23) | // Exponent + (Mantissa << 16); // Mantissa + + return reinterpret_cast<float*>(&Result)[0]; + } + + inline uint32_t FloatTo6e4(float Value) + { + uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0]; + + if ( IValue & 0x80000000U ) + { + // Positive only + return 0; + } + else if (IValue > 0x43FEFFFFU) + { + // The number is too large to be represented as a 6e4. Saturate. + return 0x3FFU; + } + else + { + if (IValue < 0x3C800000U) + { + // The number is too small to be represented as a normalized 6e4. + // Convert it to a denormalized value. + uint32_t Shift = 121U - (IValue >> 23U); + IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized 6e4. + IValue += 0xC4000000U; + } + + return ((IValue + 0xFFFFU + ((IValue >> 17U) & 1U)) >> 17U)&0x3FFU; + } + } + + inline float FloatFrom6e4( uint32_t Value ) + { + uint32_t Mantissa = (uint32_t)(Value & 0x3F); + + uint32_t Exponent = (Value & 0x3C0); + if (Exponent != 0) // The value is normalized + { + Exponent = (uint32_t)((Value >> 6) & 0xF); + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x40) == 0); + + Mantissa &= 0x3F; + } + else // The value is zero + { + Exponent = (uint32_t)-120; + } + + uint32_t Result = ((Exponent + 120) << 23) | // Exponent + (Mantissa << 17); // Mantissa + + return reinterpret_cast<float*>(&Result)[0]; + } +}; + namespace DirectX { +static const XMVECTORF32 g_Grayscale = { 0.2125f, 0.7154f, 0.0721f, 0.0f }; +static const XMVECTORF32 g_HalfMin = { -65504.f, -65504.f, -65504.f, -65504.f }; +static const XMVECTORF32 g_HalfMax = { 65504.f, 65504.f, 65504.f, 65504.f }; +static const XMVECTORF32 g_8BitBias = { 0.5f/255.f, 0.5f/255.f, 0.5f/255.f, 0.5f/255.f }; //------------------------------------------------------------------------------------- // Copies an image row with optional clearing of alpha value to 1.0 // (can be used in place as well) otherwise copies the image row unmodified. //------------------------------------------------------------------------------------- -void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags ) +void _CopyScanline(_When_(pDestination == pSource, _Inout_updates_bytes_(outSize)) + _When_(pDestination != pSource, _Out_writes_bytes_(outSize)) + LPVOID pDestination, _In_ size_t outSize, + _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, + _In_ DXGI_FORMAT format, _In_ DWORD flags) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); - assert( IsValid(format) && !IsVideo(format) ); + assert( IsValid(format) && !IsPalettized(format) ); if ( flags & TEXP_SCANLINE_SETALPHA ) { - switch( format ) + switch( static_cast<int>(format) ) { //----------------------------------------------------------------------------- case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32A32_UINT: case DXGI_FORMAT_R32G32B32A32_SINT: + if ( inSize >= 16 && outSize >= 16 ) { uint32_t alpha; if ( format == DXGI_FORMAT_R32G32B32A32_FLOAT ) @@ -56,8 +224,8 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t if ( pDestination == pSource ) { - uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 16 ) + uint32_t *dPtr = reinterpret_cast<uint32_t*> (pDestination); + for( size_t count = 0; count < ( outSize - 15 ); count += 16 ) { dPtr += 3; *(dPtr++) = alpha; @@ -68,7 +236,7 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 16 ) + for( size_t count = 0; count < ( size - 15 ); count += 16 ) { *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); @@ -87,6 +255,8 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t case DXGI_FORMAT_R16G16B16A16_UINT: case DXGI_FORMAT_R16G16B16A16_SNORM: case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_Y416: + if ( inSize >= 8 && outSize >= 8 ) { uint16_t alpha; if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT ) @@ -99,7 +269,7 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t if ( pDestination == pSource ) { uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 8 ) + for( size_t count = 0; count < ( outSize - 7 ); count += 8 ) { dPtr += 3; *(dPtr++) = alpha; @@ -110,7 +280,7 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 8 ) + for( size_t count = 0; count < ( size - 7 ); count += 8 ) { *(dPtr++) = *(sPtr++); *(dPtr++) = *(sPtr++); @@ -127,24 +297,29 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R10G10B10A2_UINT: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: - if ( pDestination == pSource ) + case DXGI_FORMAT_Y410: + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: + if ( inSize >= 4 && outSize >= 4 ) { - uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 4 ) + if ( pDestination == pSource ) { -#pragma warning(suppress: 6001 6101) // PREFast doesn't properly understand the aliasing here. - *dPtr |= 0xC0000000; - ++dPtr; + uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 3 ); count += 4 ) + { + *dPtr |= 0xC0000000; + ++dPtr; + } } - } - else - { - const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); - uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 4 ) + else { - *(dPtr++) = *(sPtr++) | 0xC0000000; + const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 3 ); count += 4 ) + { + *(dPtr++) = *(sPtr++) | 0xC0000000; + } } } return; @@ -159,13 +334,15 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_TYPELESS: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_AYUV: + if ( inSize >= 4 && outSize >= 4 ) { const uint32_t alpha = ( format == DXGI_FORMAT_R8G8B8A8_SNORM || format == DXGI_FORMAT_R8G8B8A8_SINT ) ? 0x7f000000 : 0xff000000; if ( pDestination == pSource ) { uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 4 ) + for( size_t count = 0; count < ( outSize - 3 ); count += 4 ) { uint32_t t = *dPtr & 0xFFFFFF; t |= alpha; @@ -177,7 +354,7 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 4 ) + for( size_t count = 0; count < ( size - 3 ); count += 4 ) { uint32_t t = *(sPtr++) & 0xFFFFFF; t |= alpha; @@ -189,22 +366,25 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t //----------------------------------------------------------------------------- case DXGI_FORMAT_B5G5R5A1_UNORM: - if ( pDestination == pSource ) + if ( inSize >= 2 && outSize >= 2 ) { - uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 2 ) + if ( pDestination == pSource ) { - *(dPtr++) |= 0x8000; + uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 1 ); count += 2 ) + { + *(dPtr++) |= 0x8000; + } } - } - else - { - const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); - uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); - size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 2 ) + else { - *(dPtr++) = *(sPtr++) | 0x8000; + const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); + uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 1 ); count += 2 ) + { + *(dPtr++) = *(sPtr++) | 0x8000; + } } } return; @@ -214,29 +394,30 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t memset( pDestination, 0xff, outSize ); return; -#ifdef DXGI_1_2_FORMATS //----------------------------------------------------------------------------- case DXGI_FORMAT_B4G4R4A4_UNORM: - if ( pDestination == pSource ) + if ( inSize >= 2 && outSize >= 2 ) { - uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 2 ) + if ( pDestination == pSource ) { - *(dPtr++) |= 0xF000; + uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 1 ); count += 2 ) + { + *(dPtr++) |= 0xF000; + } } - } - else - { - const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); - uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); - size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 2 ) + else { - *(dPtr++) = *(sPtr++) | 0xF000; + const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); + uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 1 ); count += 2 ) + { + *(dPtr++) = *(sPtr++) | 0xF000; + } } } return; -#endif // DXGI_1_2_FORMATS } } @@ -253,11 +434,12 @@ void _CopyScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t // Swizzles (RGB <-> BGR) an image row with optional clearing of alpha value to 1.0 // (can be used in place as well) otherwise copies the image row unmodified. //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); - assert( IsValid(format) && !IsVideo(format) ); + assert( IsValid(format) && !IsPlanar(format) && !IsPalettized(format) ); switch( format ) { @@ -266,43 +448,45 @@ void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, siz case DXGI_FORMAT_R10G10B10A2_UNORM: case DXGI_FORMAT_R10G10B10A2_UINT: case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: - if ( flags & TEXP_SCANLINE_LEGACY ) + if ( inSize >= 4 && outSize >= 4 ) { - // Swap Red (R) and Blue (B) channel (used for D3DFMT_A2R10G10B10 legacy sources) - if ( pDestination == pSource ) + if ( flags & TEXP_SCANLINE_LEGACY ) { - uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 4 ) + // Swap Red (R) and Blue (B) channel (used for D3DFMT_A2R10G10B10 legacy sources) + if ( pDestination == pSource ) { -#pragma warning(suppress: 6001 6101) // PREFast doesn't properly understand the aliasing here. - uint32_t t = *dPtr; + uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 3 ); count += 4 ) + { + uint32_t t = *dPtr; - uint32_t t1 = (t & 0x3ff00000) >> 20; - uint32_t t2 = (t & 0x000003ff) << 20; - uint32_t t3 = (t & 0x000ffc00); - uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); + uint32_t t1 = (t & 0x3ff00000) >> 20; + uint32_t t2 = (t & 0x000003ff) << 20; + uint32_t t3 = (t & 0x000ffc00); + uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); - *(dPtr++) = t1 | t2 | t3 | ta; + *(dPtr++) = t1 | t2 | t3 | ta; + } } - } - else - { - const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); - uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 4 ) + else { - uint32_t t = *(sPtr++); + const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 3 ); count += 4 ) + { + uint32_t t = *(sPtr++); - uint32_t t1 = (t & 0x3ff00000) >> 20; - uint32_t t2 = (t & 0x000003ff) << 20; - uint32_t t3 = (t & 0x000ffc00); - uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); + uint32_t t1 = (t & 0x3ff00000) >> 20; + uint32_t t2 = (t & 0x000003ff) << 20; + uint32_t t3 = (t & 0x000ffc00); + uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000); - *(dPtr++) = t1 | t2 | t3 | ta; + *(dPtr++) = t1 | t2 | t3 | ta; + } } + return; } - return; } break; @@ -316,40 +500,88 @@ void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, siz case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_TYPELESS: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: - // Swap Red (R) and Blue (B) channels (used to convert from DXGI 1.1 BGR formats to DXGI 1.0 RGB) - if ( pDestination == pSource ) + if ( inSize >= 4 && outSize >= 4 ) { - uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t count = 0; count < outSize; count += 4 ) + // Swap Red (R) and Blue (B) channels (used to convert from DXGI 1.1 BGR formats to DXGI 1.0 RGB) + if ( pDestination == pSource ) { - uint32_t t = *dPtr; + uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 3 ); count += 4 ) + { + uint32_t t = *dPtr; - uint32_t t1 = (t & 0x00ff0000) >> 16; - uint32_t t2 = (t & 0x000000ff) << 16; - uint32_t t3 = (t & 0x0000ff00); - uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); + uint32_t t1 = (t & 0x00ff0000) >> 16; + uint32_t t2 = (t & 0x000000ff) << 16; + uint32_t t3 = (t & 0x0000ff00); + uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); - *(dPtr++) = t1 | t2 | t3 | ta; + *(dPtr++) = t1 | t2 | t3 | ta; + } } + else + { + const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 3 ); count += 4 ) + { + uint32_t t = *(sPtr++); + + uint32_t t1 = (t & 0x00ff0000) >> 16; + uint32_t t2 = (t & 0x000000ff) << 16; + uint32_t t3 = (t & 0x0000ff00); + uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); + + *(dPtr++) = t1 | t2 | t3 | ta; + } + } + return; } - else + break; + + //--------------------------------------------------------------------------------- + case DXGI_FORMAT_YUY2: + if ( inSize >= 4 && outSize >= 4 ) { - const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); - uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - size_t size = std::min<size_t>( outSize, inSize ); - for( size_t count = 0; count < size; count += 4 ) + if ( flags & TEXP_SCANLINE_LEGACY ) { - uint32_t t = *(sPtr++); + // Reorder YUV components (used to convert legacy UYVY -> YUY2) + if ( pDestination == pSource ) + { + uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); + for( size_t count = 0; count < ( outSize - 3 ); count += 4 ) + { + uint32_t t = *dPtr; - uint32_t t1 = (t & 0x00ff0000) >> 16; - uint32_t t2 = (t & 0x000000ff) << 16; - uint32_t t3 = (t & 0x0000ff00); - uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000); + uint32_t t1 = (t & 0x000000ff) << 8; + uint32_t t2 = (t & 0x0000ff00) >> 8; + uint32_t t3 = (t & 0x00ff0000) << 8; + uint32_t t4 = (t & 0xff000000) >> 8; - *(dPtr++) = t1 | t2 | t3 | ta; + *(dPtr++) = t1 | t2 | t3 | t4; + } + } + else + { + const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + size_t size = std::min<size_t>( outSize, inSize ); + for( size_t count = 0; count < ( size - 3 ); count += 4 ) + { + uint32_t t = *(sPtr++); + + uint32_t t1 = (t & 0x000000ff) << 8; + uint32_t t2 = (t & 0x0000ff00) >> 8; + uint32_t t3 = (t & 0x00ff0000) << 8; + uint32_t t4 = (t & 0xff000000) >> 8; + + *(dPtr++) = t1 | t2 | t3 | t4; + } + } + return; } } - return; + break; } // Fall-through case is to just use memcpy (assuming this is not an in-place operation) @@ -365,13 +597,14 @@ void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, siz // Converts an image row with optional clearing of alpha value to 1.0 // Returns true if supported, false if expansion case not supported //------------------------------------------------------------------------------------- +_Use_decl_annotations_ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat, LPCVOID pSource, size_t inSize, DXGI_FORMAT inFormat, DWORD flags ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); - assert( IsValid(outFormat) && !IsVideo(outFormat) ); - assert( IsValid(inFormat) && !IsVideo(inFormat) ); + assert( IsValid(outFormat) && !IsPlanar(outFormat) && !IsPalettized(outFormat) ); + assert( IsValid(inFormat) && !IsPlanar(inFormat) && !IsPalettized(inFormat) ); switch( inFormat ) { @@ -380,11 +613,12 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat return false; // DXGI_FORMAT_B5G6R5_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -394,19 +628,21 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat *(dPtr++) = t1 | t2 | t3 | 0xff000000; } + return true; } - return true; + return false; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM ) return false; // DXGI_FORMAT_B5G5R5A1_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -417,20 +653,21 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat *(dPtr++) = t1 | t2 | t3 | ta; } + return true; } - return true; + return false; -#ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM ) return false; // DXGI_FORMAT_B4G4R4A4_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -441,9 +678,9 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat *(dPtr++) = t1 | t2 | t3 | ta; } + return true; } - return true; -#endif // DXGI_1_2_FORMATS + return false; } return false; @@ -457,7 +694,7 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\ - for( size_t icount = 0; icount < size; icount += sizeof(type) )\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = func( sPtr++ );\ @@ -470,7 +707,7 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\ - for( size_t icount = 0; icount < size; icount += sizeof(type) )\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ @@ -484,7 +721,7 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\ - for( size_t icount = 0; icount < size; icount += sizeof(type) )\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ @@ -494,14 +731,15 @@ bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat }\ return false; -bool _LoadScanline( XMVECTOR* pDestination, size_t count, +#pragma warning(suppress: 6101) +_Use_decl_annotations_ bool _LoadScanline( XMVECTOR* pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format ) { #if !defined(_XM_NO_INTRINSICS_) assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) ); #endif assert( pSource && size > 0 ); - assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format) && !IsCompressed(format) ); + assert( IsValid(format) && !IsTypeless(format, false) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) ); XMVECTOR* __restrict dPtr = pDestination; if ( !dPtr ) @@ -509,7 +747,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, const XMVECTOR* ePtr = pDestination + count; - switch( format ) + switch( static_cast<int>(format) ) { case DXGI_FORMAT_R32G32B32A32_FLOAT: { @@ -558,24 +796,92 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, LOAD_SCANLINE2( XMINT2, XMLoadSInt2, g_XMIdentityR3 ) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { - const float * sPtr = reinterpret_cast<const float*>(pSource); - for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) + const size_t psize = sizeof(float)+sizeof(uint32_t); + if ( size >= psize ) { - const uint8_t* ps8 = reinterpret_cast<const uint8_t*>( &sPtr[1] ); - if ( dPtr >= ePtr ) break; - *(dPtr++) = XMVectorSet( sPtr[0], static_cast<float>( *ps8 ), 0.f, 1.f ); - sPtr += 2; + const float * sPtr = reinterpret_cast<const float*>(pSource); + for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize ) + { + const uint8_t* ps8 = reinterpret_cast<const uint8_t*>( &sPtr[1] ); + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( sPtr[0], static_cast<float>( *ps8 ), 0.f, 1.f ); + sPtr += 2; + } + return true; + } + } + return false; + + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + { + const size_t psize = sizeof(float)+sizeof(uint32_t); + if ( size >= psize ) + { + const float * sPtr = reinterpret_cast<const float*>(pSource); + for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize ) + { + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( sPtr[0], 0.f /* typeless component assumed zero */, 0.f, 1.f ); + sPtr += 2; + } + return true; + } + } + return false; + + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + { + const size_t psize = sizeof(float)+sizeof(uint32_t); + if ( size >= psize ) + { + const float * sPtr = reinterpret_cast<const float*>(pSource); + for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize ) + { + const uint8_t* pg8 = reinterpret_cast<const uint8_t*>( &sPtr[1] ); + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, static_cast<float>( *pg8 ), 0.f, 1.f ); + sPtr += 2; + } + return true; } - return true; } return false; case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4 ); + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: +#if DIRECTX_MATH_VERSION >= 306 + LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4_XR ); +#else + if ( size >= sizeof(XMUDECN4) ) + { + const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( dPtr >= ePtr ) break; + + int32_t ElementX = sPtr->v & 0x3FF; + int32_t ElementY = (sPtr->v >> 10) & 0x3FF; + int32_t ElementZ = (sPtr->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { + (float)(ElementX - 0x180) / 510.0f, + (float)(ElementY - 0x180) / 510.0f, + (float)(ElementZ - 0x180) / 510.0f, + (float)(sPtr->v >> 30) / 3.0f + }; + + ++sPtr; + + *(dPtr++) = vResult.v; + } + return true; + } + return false; +#endif + case DXGI_FORMAT_R10G10B10A2_UINT: LOAD_SCANLINE( XMUDEC4, XMLoadUDec4 ); @@ -615,7 +921,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(float) ) { const float* __restrict sPtr = reinterpret_cast<const float*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(float) ) + for( size_t icount = 0; icount < ( size - sizeof(float) + 1 ); icount += sizeof(float) ) { XMVECTOR v = XMLoadFloat( sPtr++ ); if ( dPtr >= ePtr ) break; @@ -629,7 +935,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(uint32_t) ) { const uint32_t* __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) { XMVECTOR v = XMLoadInt( sPtr++ ); v = XMConvertVectorUIntToFloat( v, 0 ); @@ -644,7 +950,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(int32_t) ) { const int32_t * __restrict sPtr = reinterpret_cast<const int32_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(int32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int32_t) + 1 ); icount += sizeof(int32_t) ) { XMVECTOR v = XMLoadInt( reinterpret_cast<const uint32_t*> (sPtr++) ); v = XMConvertVectorIntToFloat( v, 0 ); @@ -659,7 +965,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(uint32_t) ) { const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) { float d = static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f; float s = static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 ); @@ -671,6 +977,36 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, } return false; + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + if ( size >= sizeof(uint32_t) ) + { + const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) + { + float r = static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f; + ++sPtr; + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( r, 0.f /* typeless component assumed zero */, 0.f, 1.f ); + } + return true; + } + return false; + + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + if ( size >= sizeof(uint32_t) ) + { + const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) + { + float g = static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 ); + ++sPtr; + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, g, 0.f, 1.f ); + } + return true; + } + return false; + case DXGI_FORMAT_R8G8_UNORM: LOAD_SCANLINE2( XMUBYTEN2, XMLoadUByteN2, g_XMIdentityR3 ) @@ -687,7 +1023,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(HALF) ) { const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(HALF) ) + for( size_t icount = 0; icount < ( size - sizeof(HALF) + 1 ); icount += sizeof(HALF) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f ); @@ -701,7 +1037,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(uint16_t) ) { const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 65535.f, 0.f, 0.f, 1.f ); @@ -714,7 +1050,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(uint16_t) ) { const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f ); @@ -727,7 +1063,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(int16_t) ) { const int16_t * __restrict sPtr = reinterpret_cast<const int16_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 32767.f, 0.f, 0.f, 1.f ); @@ -740,7 +1076,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(int16_t) ) { const int16_t * __restrict sPtr = reinterpret_cast<const int16_t*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f ); @@ -776,10 +1112,10 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, return false; case DXGI_FORMAT_R8_SNORM: - if ( size >= sizeof(char) ) + if ( size >= sizeof(int8_t) ) { - const char * __restrict sPtr = reinterpret_cast<const char*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(char) ) + const int8_t * __restrict sPtr = reinterpret_cast<const int8_t*>(pSource); + for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 127.f, 0.f, 0.f, 1.f ); @@ -789,10 +1125,10 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, return false; case DXGI_FORMAT_R8_SINT: - if ( size >= sizeof(char) ) + if ( size >= sizeof(int8_t) ) { - const char * __restrict sPtr = reinterpret_cast<const char*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(char) ) + const int8_t * __restrict sPtr = reinterpret_cast<const int8_t*>(pSource); + for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( dPtr >= ePtr ) break; *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f ); @@ -820,10 +1156,10 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { - for( size_t bcount = 0; bcount < 8; ++bcount ) + for( size_t bcount = 8; bcount > 0; --bcount ) { if ( dPtr >= ePtr ) break; - *(dPtr++) = XMVectorSet( (((*sPtr >> bcount) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f ); + *(dPtr++) = XMVectorSet( (((*sPtr >> (bcount-1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f ); } ++sPtr; @@ -833,13 +1169,37 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, return false; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: +#if DIRECTX_MATH_VERSION >= 306 LOAD_SCANLINE3( XMFLOAT3SE, XMLoadFloat3SE, g_XMIdentityR3 ) +#else + if ( size >= sizeof(XMFLOAT3SE) ) + { + const XMFLOAT3SE * __restrict sPtr = reinterpret_cast<const XMFLOAT3SE*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMFLOAT3SE) + 1 ); icount += sizeof(XMFLOAT3SE) ) + { + union { float f; int32_t i; } fi; + fi.i = 0x33800000 + (sPtr->e << 23); + float Scale = fi.f; + + XMVECTORF32 v = { + Scale * float( sPtr->xm ), + Scale * float( sPtr->ym ), + Scale * float( sPtr->zm ), + 1.0f }; + + if ( dPtr >= ePtr ) break; + *(dPtr++) = v; + } + return true; + } + return false; +#endif case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); XMVECTOR v1 = XMVectorSwizzle<0, 3, 2, 1>( v ); @@ -856,7 +1216,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( v ); @@ -873,9 +1233,9 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, case DXGI_FORMAT_B5G6R5_UNORM: if ( size >= sizeof(XMU565) ) { - static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f }; + static const XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f }; const XMU565 * __restrict sPtr = reinterpret_cast<const XMU565*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMU565) ) + for( size_t icount = 0; icount < ( size - sizeof(XMU565) + 1 ); icount += sizeof(XMU565) ) { XMVECTOR v = XMLoadU565( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); @@ -890,9 +1250,9 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, case DXGI_FORMAT_B5G5R5A1_UNORM: if ( size >= sizeof(XMU555) ) { - static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/31.f, 1.f/31.f, 1.f }; + static const XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/31.f, 1.f/31.f, 1.f }; const XMU555 * __restrict sPtr = reinterpret_cast<const XMU555*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMU555) ) + for( size_t icount = 0; icount < ( size - sizeof(XMU555) + 1 ); icount += sizeof(XMU555) ) { XMVECTOR v = XMLoadU555( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); @@ -908,7 +1268,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); if ( dPtr >= ePtr ) break; @@ -923,7 +1283,7 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(XMUBYTEN4) ) { const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { XMVECTOR v = XMLoadUByteN4( sPtr++ ); v = XMVectorSwizzle<2, 1, 0, 3>( v ); @@ -934,13 +1294,232 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, } return false; -#ifdef DXGI_1_2_FORMATS + case DXGI_FORMAT_AYUV: + if ( size >= sizeof(XMUBYTEN4) ) + { + const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) + { + int v = int(sPtr->x) - 128; + int u = int(sPtr->y) - 128; + int y = int(sPtr->z) - 16; + unsigned int a = sPtr->w; + ++sPtr; + + // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx + + // Y’ = Y - 16 + // Cb’ = Cb - 128 + // Cr’ = Cr - 128 + + // R = 1.1644Y’ + 1.5960Cr’ + // G = 1.1644Y’ - 0.3917Cb’ - 0.8128Cr’ + // B = 1.1644Y’ + 2.0172Cb’ + + int r = (298 * y + 409 * v + 128) >> 8; + int g = (298 * y - 100 * u - 208 * v + 128) >> 8; + int b = (298 * y + 516 * u + 128) >> 8; + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f, + float( a / 255.f ) ); + } + return true; + } + return false; + + case DXGI_FORMAT_Y410: + if ( size >= sizeof(XMUDECN4) ) + { + const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + int64_t u = int(sPtr->x) - 512; + int64_t y = int(sPtr->y) - 64; + int64_t v = int(sPtr->z) - 512; + unsigned int a = sPtr->w; + ++sPtr; + + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx + + // Y’ = Y - 64 + // Cb’ = Cb - 512 + // Cr’ = Cr - 512 + + // R = 1.1678Y’ + 1.6007Cr’ + // G = 1.1678Y’ - 0.3929Cb’ - 0.8152Cr’ + // B = 1.1678Y’ + 2.0232Cb’ + + int r = static_cast<int>( (76533 * y + 104905 * v + 32768) >> 16 ); + int g = static_cast<int>( (76533 * y - 25747 * u - 53425 * v + 32768) >> 16 ); + int b = static_cast<int>( (76533 * y + 132590 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f, + float( a / 3.f ) ); + } + return true; + } + return false; + + case DXGI_FORMAT_Y416: + if ( size >= sizeof(XMUSHORTN4) ) + { + const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + int64_t u = int64_t(sPtr->x) - 32768; + int64_t y = int64_t(sPtr->y) - 4096; + int64_t v = int64_t(sPtr->z) - 32768; + unsigned int a = sPtr->w; + ++sPtr; + + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx + + // Y’ = Y - 4096 + // Cb’ = Cb - 32768 + // Cr’ = Cr - 32768 + + // R = 1.1689Y’ + 1.6023Cr’ + // G = 1.1689Y’ - 0.3933Cb’ - 0.8160Cr’ + // B = 1.1689Y’+ 2.0251Cb’ + + int r = static_cast<int>( (76607 * y + 105006 * v + 32768) >> 16 ); + int g = static_cast<int>( (76607 * y - 25772 * u - 53477 * v + 32768) >> 16 ); + int b = static_cast<int>( (76607 * y + 132718 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( a, 0 ), 65535 ) ) / 65535.f ); + } + return true; + } + return false; + + case DXGI_FORMAT_YUY2: + if ( size >= sizeof(XMUBYTEN4) ) + { + const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) + { + int y0 = int(sPtr->x) - 16; + int u = int(sPtr->y) - 128; + int y1 = int(sPtr->z) - 16; + int v = int(sPtr->w) - 128; + ++sPtr; + + // See AYUV + int r = (298 * y0 + 409 * v + 128) >> 8; + int g = (298 * y0 - 100 * u - 208 * v + 128) >> 8; + int b = (298 * y0 + 516 * u + 128) >> 8; + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f, + 1.f ); + + r = (298 * y1 + 409 * v + 128) >> 8; + g = (298 * y1 - 100 * u - 208 * v + 128) >> 8; + b = (298 * y1 + 516 * u + 128) >> 8; + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f, + float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f, + 1.f ); + } + return true; + } + return false; + + case DXGI_FORMAT_Y210: + // Same as Y216 with least significant 6 bits set to zero + if ( size >= sizeof(XMUSHORTN4) ) + { + const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + int64_t y0 = int64_t(sPtr->x >> 6) - 64; + int64_t u = int64_t(sPtr->y >> 6) - 512; + int64_t y1 = int64_t(sPtr->z >> 6) - 64; + int64_t v = int64_t(sPtr->w >> 6) - 512; + ++sPtr; + + // See Y410 + int r = static_cast<int>( (76533 * y0 + 104905 * v + 32768) >> 16 ); + int g = static_cast<int>( (76533 * y0 - 25747 * u - 53425 * v + 32768) >> 16 ); + int b = static_cast<int>( (76533 * y0 + 132590 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f, + 1.f ); + + r = static_cast<int>( (76533 * y1 + 104905 * v + 32768) >> 16 ); + g = static_cast<int>( (76533 * y1 - 25747 * u - 53425 * v + 32768) >> 16 ); + b = static_cast<int>( (76533 * y1 + 132590 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f, + float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f, + 1.f ); + } + return true; + } + return false; + + case DXGI_FORMAT_Y216: + if ( size >= sizeof(XMUSHORTN4) ) + { + const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + int64_t y0 = int64_t(sPtr->x) - 4096; + int64_t u = int64_t(sPtr->y) - 32768; + int64_t y1 = int64_t(sPtr->z) - 4096; + int64_t v = int64_t(sPtr->w) - 32768; + ++sPtr; + + // See Y416 + int r = static_cast<int>( (76607 * y0 + 105006 * v + 32768) >> 16 ); + int g = static_cast<int>( (76607 * y0 - 25772 * u - 53477 * v + 32768) >> 16 ); + int b = static_cast<int>( (76607 * y0 + 132718 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f, + 1.f ); + + r = static_cast<int>( (76607 * y1 + 105006 * v + 32768) >> 16 ); + g = static_cast<int>( (76607 * y1 - 25772 * u - 53477 * v + 32768) >> 16 ); + b = static_cast<int>( (76607 * y1 + 132718 * u + 32768) >> 16 ); + + if ( dPtr >= ePtr ) break; + *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f, + float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f, + 1.f ); + } + return true; + } + return false; + case DXGI_FORMAT_B4G4R4A4_UNORM: if ( size >= sizeof(XMUNIBBLE4) ) { - static XMVECTORF32 s_Scale = { 1.f/15.f, 1.f/15.f, 1.f/15.f, 1.f/15.f }; + static const XMVECTORF32 s_Scale = { 1.f/15.f, 1.f/15.f, 1.f/15.f, 1.f/15.f }; const XMUNIBBLE4 * __restrict sPtr = reinterpret_cast<const XMUNIBBLE4*>(pSource); - for( size_t icount = 0; icount < size; icount += sizeof(XMUNIBBLE4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUNIBBLE4) + 1 ); icount += sizeof(XMUNIBBLE4) ) { XMVECTOR v = XMLoadUNibble4( sPtr++ ); v = XMVectorMultiply( v, s_Scale ); @@ -951,14 +1530,65 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, } return false; - // we don't support the video formats ( see IsVideo function ) -#endif // DXGI_1_2_FORMATS + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + // Xbox One specific 7e3 format + if ( size >= sizeof(XMUDECN4) ) + { + const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( dPtr >= ePtr ) break; + + XMVECTORF32 vResult = { + FloatFrom7e3(sPtr->x), + FloatFrom7e3(sPtr->y), + FloatFrom7e3(sPtr->z), + (float)(sPtr->v >> 30) / 3.0f + }; + + ++sPtr; + + *(dPtr++) = vResult.v; + } + return true; + } + return false; + + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: + // Xbox One specific 6e4 format + if ( size >= sizeof(XMUDECN4) ) + { + const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( dPtr >= ePtr ) break; + + XMVECTORF32 vResult = { + FloatFrom6e4(sPtr->x), + FloatFrom6e4(sPtr->y), + FloatFrom6e4(sPtr->z), + (float)(sPtr->v >> 30) / 3.0f + }; + + ++sPtr; + + *(dPtr++) = vResult.v; + } + return true; + } + return false; + + // We don't support the planar or palettized formats default: return false; } } +#undef LOAD_SCANLINE +#undef LOAD_SCANLINE3 +#undef LOAD_SCANLINE2 + //------------------------------------------------------------------------------------- // Stores an image row from standard RGBA XMVECTOR (aligned) array @@ -967,22 +1597,24 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, if ( size >= sizeof(type) )\ {\ type * __restrict dPtr = reinterpret_cast<type*>(pDestination);\ - for( size_t icount = 0; icount < size; icount += sizeof(type) )\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ if ( sPtr >= ePtr ) break;\ func( dPtr++, *sPtr++ );\ }\ + return true; \ }\ - return true; + return false; +_Use_decl_annotations_ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, - const XMVECTOR* pSource, size_t count ) + const XMVECTOR* pSource, size_t count, float threshold ) { assert( pDestination && size > 0 ); #if !defined(_XM_NO_INTRINSICS_) assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); #endif - assert( IsValid(format) && !IsVideo(format) && !IsTypeless(format) && !IsCompressed(format) ); + assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) ); const XMVECTOR* __restrict sPtr = pSource; if ( !sPtr ) @@ -990,7 +1622,7 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, const XMVECTOR* ePtr = pSource + count; - switch( format ) + switch( static_cast<int>(format) ) { case DXGI_FORMAT_R32G32B32A32_FLOAT: STORE_SCANLINE( XMFLOAT4, XMStoreFloat4 ) @@ -1011,7 +1643,19 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, STORE_SCANLINE( XMINT3, XMStoreSInt3 ) case DXGI_FORMAT_R16G16B16A16_FLOAT: - STORE_SCANLINE( XMHALF4, XMStoreHalf4 ) + if ( size >= sizeof(XMHALF4) ) + { + XMHALF4* __restrict dPtr = reinterpret_cast<XMHALF4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMHALF4) + 1 ); icount += sizeof(XMHALF4) ) + { + if ( sPtr >= ePtr ) break; + XMVECTOR v = *sPtr++; + v = XMVectorClamp( v, g_HalfMin, g_HalfMax ); + XMStoreHalf4( dPtr++, v ); + } + return true; + } + return false; case DXGI_FORMAT_R16G16B16A16_UNORM: STORE_SCANLINE( XMUSHORTN4, XMStoreUShortN4 ) @@ -1035,27 +1679,62 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, STORE_SCANLINE( XMINT2, XMStoreSInt2 ) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - if ( size >= (sizeof(float)+sizeof(uint32_t)) ) { - float *dPtr = reinterpret_cast<float*>(pDestination); - for( size_t icount = 0; icount < size; icount += (sizeof(float)+sizeof(uint32_t)) ) + const size_t psize = sizeof(float)+sizeof(uint32_t); + if ( size >= psize ) { - if ( sPtr >= ePtr ) break; - XMFLOAT4 f; - XMStoreFloat4( &f, *sPtr++ ); - dPtr[0] = f.x; - uint8_t* ps8 = reinterpret_cast<uint8_t*>( &dPtr[1] ); - ps8[0] = static_cast<uint8_t>( std::min<float>( 255.f, std::max<float>( 0.f, f.y ) ) ); - ps8[1] = ps8[2] = ps8[3] = 0; - dPtr += 2; + float *dPtr = reinterpret_cast<float*>(pDestination); + for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize ) + { + if ( sPtr >= ePtr ) break; + XMFLOAT4 f; + XMStoreFloat4( &f, *sPtr++ ); + dPtr[0] = f.x; + uint8_t* ps8 = reinterpret_cast<uint8_t*>( &dPtr[1] ); + ps8[0] = static_cast<uint8_t>( std::min<float>( 255.f, std::max<float>( 0.f, f.y ) ) ); + ps8[1] = ps8[2] = ps8[3] = 0; + dPtr += 2; + } + return true; } } - return true; + return false; case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: STORE_SCANLINE( XMUDECN4, XMStoreUDecN4 ); + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: +#if DIRECTX_MATH_VERSION >= 306 + STORE_SCANLINE( XMUDECN4, XMStoreUDecN4_XR ); +#else + if ( size >= sizeof(XMUDECN4) ) + { + static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f }; + static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f }; + static const XMVECTORF32 C = { 1023.f, 1023.f, 1023.f, 3.f }; + + XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( sPtr >= ePtr ) break; + + XMVECTOR N = XMVectorMultiplyAdd( *sPtr++, Scale, Bias ); + N = XMVectorClamp( N, g_XMZero, C ); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N ); + + dPtr->v = ((uint32_t)tmp.w << 30) + | (((uint32_t)tmp.z & 0x3FF) << 20) + | (((uint32_t)tmp.y & 0x3FF) << 10) + | (((uint32_t)tmp.x & 0x3FF)); + ++dPtr; + } + return true; + } + return false; +#endif + case DXGI_FORMAT_R10G10B10A2_UINT: STORE_SCANLINE( XMUDEC4, XMStoreUDec4 ); @@ -1064,7 +1743,18 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - STORE_SCANLINE( XMUBYTEN4, XMStoreUByteN4 ) + if ( size >= sizeof(XMUBYTEN4) ) + { + XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) + { + if ( sPtr >= ePtr ) break; + XMVECTOR v = XMVectorAdd( *sPtr++, g_8BitBias ); + XMStoreUByteN4( dPtr++, v ); + } + return true; + } + return false; case DXGI_FORMAT_R8G8B8A8_UINT: STORE_SCANLINE( XMUBYTE4, XMStoreUByte4 ) @@ -1076,7 +1766,19 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, STORE_SCANLINE( XMBYTE4, XMStoreByte4 ) case DXGI_FORMAT_R16G16_FLOAT: - STORE_SCANLINE( XMHALF2, XMStoreHalf2 ) + if ( size >= sizeof(XMHALF2) ) + { + XMHALF2* __restrict dPtr = reinterpret_cast<XMHALF2*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMHALF2) + 1 ); icount += sizeof(XMHALF2) ) + { + if ( sPtr >= ePtr ) break; + XMVECTOR v = *sPtr++; + v = XMVectorClamp( v, g_HalfMin, g_HalfMax ); + XMStoreHalf2( dPtr++, v ); + } + return true; + } + return false; case DXGI_FORMAT_R16G16_UNORM: STORE_SCANLINE( XMUSHORTN2, XMStoreUShortN2 ) @@ -1095,39 +1797,42 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, if ( size >= sizeof(float) ) { float * __restrict dPtr = reinterpret_cast<float*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(float) ) + for( size_t icount = 0; icount < ( size - sizeof(float) + 1 ); icount += sizeof(float) ) { if ( sPtr >= ePtr ) break; XMStoreFloat( dPtr++, *(sPtr++) ); } + return true; } - return true; + return false; case DXGI_FORMAT_R32_UINT: if ( size >= sizeof(uint32_t) ) { uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMConvertVectorFloatToUInt( *(sPtr++), 0 ); XMStoreInt( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_R32_SINT: - if ( size >= sizeof(uint32_t) ) + if ( size >= sizeof(int32_t) ) { uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int32_t) + 1 ); icount += sizeof(int32_t) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMConvertVectorFloatToInt( *(sPtr++), 0 ); XMStoreInt( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_D24_UNORM_S8_UINT: if ( size >= sizeof(uint32_t) ) @@ -1135,7 +1840,7 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, static const XMVECTORF32 clamp = { 1.f, 255.f, 0.f, 0.f }; XMVECTOR zero = XMVectorZero(); uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(uint32_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) ) { if ( sPtr >= ePtr ) break; XMFLOAT4 f; @@ -1143,8 +1848,9 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, *dPtr++ = (static_cast<uint32_t>( f.x * 16777215.f ) & 0xFFFFFF) | ((static_cast<uint32_t>( f.y ) & 0xFF) << 24); } + return true; } - return true; + return false; case DXGI_FORMAT_R8G8_UNORM: STORE_SCANLINE( XMUBYTEN2, XMStoreUByteN2 ) @@ -1162,71 +1868,77 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, if ( size >= sizeof(HALF) ) { HALF * __restrict dPtr = reinterpret_cast<HALF*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(HALF) ) + for( size_t icount = 0; icount < ( size - sizeof(HALF) + 1 ); icount += sizeof(HALF) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); + v = std::max<float>( std::min<float>( v, 65504.f ), -65504.f ); *(dPtr++) = XMConvertFloatToHalf(v); } + return true; } - return true; + return false; case DXGI_FORMAT_D16_UNORM: case DXGI_FORMAT_R16_UNORM: - if ( size >= sizeof(int16_t) ) + if ( size >= sizeof(uint16_t) ) { - int16_t * __restrict dPtr = reinterpret_cast<int16_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) + uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 1.f ), 0.f ); *(dPtr++) = static_cast<uint16_t>( v*65535.f + 0.5f ); } + return true; } - return true; + return false; case DXGI_FORMAT_R16_UINT: if ( size >= sizeof(uint16_t) ) { uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(uint16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 65535.f ), 0.f ); *(dPtr++) = static_cast<uint16_t>(v); } + return true; } - return true; + return false; case DXGI_FORMAT_R16_SNORM: if ( size >= sizeof(int16_t) ) { int16_t * __restrict dPtr = reinterpret_cast<int16_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 1.f ), -1.f ); - *(dPtr++) = static_cast<uint16_t>( v * 32767.f ); + *(dPtr++) = static_cast<int16_t>( v * 32767.f ); } + return true; } - return true; + return false; case DXGI_FORMAT_R16_SINT: if ( size >= sizeof(int16_t) ) { int16_t * __restrict dPtr = reinterpret_cast<int16_t*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(int16_t) ) + for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 32767.f ), -32767.f ); *(dPtr++) = static_cast<int16_t>(v); } + return true; } - return true; + return false; case DXGI_FORMAT_R8_UNORM: if ( size >= sizeof(uint8_t) ) @@ -1237,10 +1949,11 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 1.f ), 0.f ); - *(dPtr++) = static_cast<uint8_t>( v * 255.f); + *(dPtr++) = static_cast<uint8_t>( v * 255.f ); } + return true; } - return true; + return false; case DXGI_FORMAT_R8_UINT: if ( size >= sizeof(uint8_t) ) @@ -1253,36 +1966,39 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, v = std::max<float>( std::min<float>( v, 255.f ), 0.f ); *(dPtr++) = static_cast<uint8_t>(v); } + return true; } - return true; + return false; case DXGI_FORMAT_R8_SNORM: - if ( size >= sizeof(char) ) + if ( size >= sizeof(int8_t) ) { - char * __restrict dPtr = reinterpret_cast<char*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(char) ) + int8_t * __restrict dPtr = reinterpret_cast<int8_t*>(pDestination); + for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 1.f ), -1.f ); - *(dPtr++) = static_cast<char>( v * 127.f ); + *(dPtr++) = static_cast<int8_t>( v * 127.f ); } + return true; } - return true; + return false; case DXGI_FORMAT_R8_SINT: - if ( size >= sizeof(char) ) + if ( size >= sizeof(int8_t) ) { - char * __restrict dPtr = reinterpret_cast<char*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(char) ) + int8_t * __restrict dPtr = reinterpret_cast<int8_t*>(pDestination); + for( size_t icount = 0; icount < size; icount += sizeof(int8_t) ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); v = std::max<float>( std::min<float>( v, 127.f ), -127.f ); - *(dPtr++) = static_cast<char>( v ); + *(dPtr++) = static_cast<int8_t>( v ); } + return true; } - return true; + return false; case DXGI_FORMAT_A8_UNORM: if ( size >= sizeof(uint8_t) ) @@ -1295,8 +2011,9 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, v = std::max<float>( std::min<float>( v, 1.f ), 0.f ); *(dPtr++) = static_cast<uint8_t>( v * 255.f); } + return true; } - return true; + return false; case DXGI_FORMAT_R1_UNORM: if ( size >= sizeof(uint8_t) ) @@ -1305,139 +2022,494 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { uint8_t pixels = 0; - for( size_t bcount = 0; bcount < 8; ++bcount ) + for( size_t bcount = 8; bcount > 0; --bcount ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); - if ( v > 0.5f ) - pixels |= 1 << bcount; + + // Absolute thresholding generally doesn't give good results for all images + // Picking the 'right' threshold automatically requires whole-image analysis + + if ( v > 0.25f ) + pixels |= 1 << (bcount-1); } *(dPtr++) = pixels; } + return true; } - return true; + return false; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: +#if DIRECTX_MATH_VERSION >= 306 STORE_SCANLINE( XMFLOAT3SE, XMStoreFloat3SE ) +#else + if ( size >= sizeof(XMFLOAT3SE) ) + { + static const float maxf9 = float(0x1FF << 7); + static const float minf9 = float(1.f / (1 << 16)); + + XMFLOAT3SE * __restrict dPtr = reinterpret_cast<XMFLOAT3SE*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMFLOAT3SE) + 1 ); icount += sizeof(XMFLOAT3SE) ) + { + if ( sPtr >= ePtr ) break; + + XMFLOAT3 rgb; + XMStoreFloat3( &rgb, *(sPtr++) ); + + float r = (rgb.x >= 0.f) ? ( (rgb.x > maxf9) ? maxf9 : rgb.x ) : 0.f; + float g = (rgb.y >= 0.f) ? ( (rgb.y > maxf9) ? maxf9 : rgb.y ) : 0.f; + float b = (rgb.z >= 0.f) ? ( (rgb.z > maxf9) ? maxf9 : rgb.z ) : 0.f; + + const float max_rg = (r > g) ? r : g; + const float max_rgb = (max_rg > b) ? max_rg : b; + + const float maxColor = (max_rgb > minf9) ? max_rgb : minf9; + + union { float f; INT32 i; } fi; + fi.f = maxColor; + fi.i &= 0xFF800000; // cut off fraction + + dPtr->e = (fi.i - 0x37800000) >> 23; + + fi.i = 0x83000000 - fi.i; + float ScaleR = fi.f; + + dPtr->xm = static_cast<uint32_t>( round_to_nearest(r * ScaleR) ); + dPtr->ym = static_cast<uint32_t>( round_to_nearest(g * ScaleR) ); + dPtr->zm = static_cast<uint32_t>( round_to_nearest(b * ScaleR) ); + ++dPtr; + } + return true; + } + return false; +#endif case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v0 = *sPtr++; XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero(); XMVECTOR v = XMVectorSelect( v1, v0, g_XMSelect1110 ); + v = XMVectorAdd( v, g_8BitBias ); XMStoreUByteN4( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_G8R8_G8B8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) { - static XMVECTORI32 select1101 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1}; + static XMVECTORU32 select1101 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1}; XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( *sPtr++ ); XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero(); XMVECTOR v = XMVectorSelect( v1, v0, select1101 ); + v = XMVectorAdd( v, g_8BitBias ); XMStoreUByteN4( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_B5G6R5_UNORM: if ( size >= sizeof(XMU565) ) { - static XMVECTORF32 s_Scale = { 31.f, 63.f, 31.f, 1.f }; + static const XMVECTORF32 s_Scale = { 31.f, 63.f, 31.f, 1.f }; XMU565 * __restrict dPtr = reinterpret_cast<XMU565*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMU565) ) + for( size_t icount = 0; icount < ( size - sizeof(XMU565) + 1 ); icount += sizeof(XMU565) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); XMStoreU565( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_B5G5R5A1_UNORM: if ( size >= sizeof(XMU555) ) { - static XMVECTORF32 s_Scale = { 31.f, 31.f, 31.f, 1.f }; + static const XMVECTORF32 s_Scale = { 31.f, 31.f, 31.f, 1.f }; XMU555 * __restrict dPtr = reinterpret_cast<XMU555*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMU555) ) + for( size_t icount = 0; icount < ( size - sizeof(XMU555) + 1 ); icount += sizeof(XMU555) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); - XMStoreU555( dPtr++, v ); + XMStoreU555( dPtr, v ); + dPtr->w = ( XMVectorGetW( v ) > threshold ) ? 1 : 0; + ++dPtr; } + return true; } - return true; + return false; case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); + v = XMVectorAdd( v, g_8BitBias ); XMStoreUByteN4( dPtr++, v ); } + return true; } - return true; + return false; case DXGI_FORMAT_B8G8R8X8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: if ( size >= sizeof(XMUBYTEN4) ) { XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMUBYTEN4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorPermute<2, 1, 0, 7>( *sPtr++, g_XMIdentityR3 ); + v = XMVectorAdd( v, g_8BitBias ); XMStoreUByteN4( dPtr++, v ); } + return true; } - return true; + return false; + + case DXGI_FORMAT_AYUV: + if ( size >= sizeof(XMUBYTEN4) ) + { + XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUBYTEN4 rgba; + XMStoreUByteN4( &rgba, *sPtr++ ); + + // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx + + // Y = 0.2568R + 0.5041G + 0.1001B + 16 + // Cb = -0.1482R - 0.2910G + 0.4392B + 128 + // Cr = 0.4392R - 0.3678G - 0.0714B + 128 + + int y = ( ( 66 * rgba.x + 129 * rgba.y + 25 * rgba.z + 128) >> 8) + 16; + int u = ( ( -38 * rgba.x - 74 * rgba.y + 112 * rgba.z + 128) >> 8) + 128; + int v = ( ( 112 * rgba.x - 94 * rgba.y - 18 * rgba.z + 128) >> 8) + 128; + + dPtr->x = static_cast<uint8_t>( std::min<int>( std::max<int>( v, 0 ), 255 ) ); + dPtr->y = static_cast<uint8_t>( std::min<int>( std::max<int>( u, 0 ), 255 ) ); + dPtr->z = static_cast<uint8_t>( std::min<int>( std::max<int>( y, 0 ), 255 ) ); + dPtr->w = rgba.w; + ++dPtr; + } + return true; + } + return false; + + case DXGI_FORMAT_Y410: + if ( size >= sizeof(XMUDECN4) ) + { + XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUDECN4 rgba; + XMStoreUDecN4( &rgba, *sPtr++ ); + + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx + + // Y = 0.2560R + 0.5027G + 0.0998B + 64 + // Cb = -0.1478R - 0.2902G + 0.4379B + 512 + // Cr = 0.4379R - 0.3667G - 0.0712B + 512 + + int64_t r = rgba.x; + int64_t g = rgba.y; + int64_t b = rgba.z; + + int y = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64; + int u = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512; + int v = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512; + + dPtr->x = static_cast<uint32_t>( std::min<int>( std::max<int>( u, 0 ), 1023 ) ); + dPtr->y = static_cast<uint32_t>( std::min<int>( std::max<int>( y, 0 ), 1023 ) ); + dPtr->z = static_cast<uint32_t>( std::min<int>( std::max<int>( v, 0 ), 1023 ) ); + dPtr->w = rgba.w; + ++dPtr; + } + return true; + } + return false; + + case DXGI_FORMAT_Y416: + if ( size >= sizeof(XMUSHORTN4) ) + { + XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUSHORTN4 rgba; + XMStoreUShortN4( &rgba, *sPtr++ ); + + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx + + // Y = 0.2558R + 0.5022G + 0.0998B + 4096 + // Cb = -0.1476R - 0.2899G + 0.4375B + 32768 + // Cr = 0.4375R - 0.3664G - 0.0711B + 32768 + + int64_t r = int64_t(rgba.x); + int64_t g = int64_t(rgba.y); + int64_t b = int64_t(rgba.z); + + int y = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096; + int u = static_cast<int>( ( -9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768; + int v = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768; + + dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( u, 0 ), 65535 ) ); + dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( y, 0 ), 65535 ) ); + dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( v, 0 ), 65535 ) ); + dPtr->w = rgba.w; + ++dPtr; + } + return true; + } + return false; + + case DXGI_FORMAT_YUY2: + if ( size >= sizeof(XMUBYTEN4) ) + { + XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUBYTEN4 rgb1; + XMStoreUByteN4( &rgb1, *sPtr++ ); + + // See AYUV + int y0 = ( ( 66 * rgb1.x + 129 * rgb1.y + 25 * rgb1.z + 128) >> 8) + 16; + int u0 = ( ( -38 * rgb1.x - 74 * rgb1.y + 112 * rgb1.z + 128) >> 8) + 128; + int v0 = ( ( 112 * rgb1.x - 94 * rgb1.y - 18 * rgb1.z + 128) >> 8) + 128; + + XMUBYTEN4 rgb2; + if(sPtr < ePtr) + { + XMStoreUByteN4( &rgb2, *sPtr++ ); + } + else + { + rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0; + } + + int y1 = ( ( 66 * rgb2.x + 129 * rgb2.y + 25 * rgb2.z + 128) >> 8) + 16; + int u1 = ( ( -38 * rgb2.x - 74 * rgb2.y + 112 * rgb2.z + 128) >> 8) + 128; + int v1 = ( ( 112 * rgb2.x - 94 * rgb2.y - 18 * rgb2.z + 128) >> 8) + 128; + + dPtr->x = static_cast<uint8_t>( std::min<int>( std::max<int>( y0, 0 ), 255 ) ); + dPtr->y = static_cast<uint8_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 255 ) ); + dPtr->z = static_cast<uint8_t>( std::min<int>( std::max<int>( y1, 0 ), 255 ) ); + dPtr->w = static_cast<uint8_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 255 ) ); + ++dPtr; + } + return true; + } + return false; + + case DXGI_FORMAT_Y210: + // Same as Y216 with least significant 6 bits set to zero + if ( size >= sizeof(XMUSHORTN4) ) + { + XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUDECN4 rgb1; + XMStoreUDecN4( &rgb1, *sPtr++ ); + + // See Y410 + int64_t r = rgb1.x; + int64_t g = rgb1.y; + int64_t b = rgb1.z; + + int y0 = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64; + int u0 = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512; + int v0 = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512; + + XMUDECN4 rgb2; + if(sPtr < ePtr) + { + XMStoreUDecN4( &rgb2, *sPtr++ ); + } + else + { + rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0; + } + + r = rgb2.x; + g = rgb2.y; + b = rgb2.z; + + int y1 = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64; + int u1 = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512; + int v1 = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512; + + dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( y0, 0 ), 1023 ) << 6 ); + dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 1023 ) << 6 ); + dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( y1, 0 ), 1023 ) << 6 ); + dPtr->w = static_cast<uint16_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 1023 ) << 6 ); + ++dPtr; + } + return true; + } + return false; + + case DXGI_FORMAT_Y216: + if ( size >= sizeof(XMUSHORTN4) ) + { + XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) ) + { + if ( sPtr >= ePtr ) break; + + XMUSHORTN4 rgb1; + XMStoreUShortN4( &rgb1, *sPtr++ ); + + // See Y416 + int64_t r = int64_t(rgb1.x); + int64_t g = int64_t(rgb1.y); + int64_t b = int64_t(rgb1.z); + + int y0 = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096; + int u0 = static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768; + int v0 = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768; + + XMUSHORTN4 rgb2; + if(sPtr < ePtr) + { + XMStoreUShortN4( &rgb2, *sPtr++ ); + } + else + { + rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0; + } + + r = int64_t(rgb2.x); + g = int64_t(rgb2.y); + b = int64_t(rgb2.z); + + int y1 = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096; + int u1 = static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768; + int v1 = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768; + + dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( y0, 0 ), 65535 ) ); + dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 65535 ) ); + dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( y1, 0 ), 65535 ) ); + dPtr->w = static_cast<uint16_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 65535 ) ); + ++dPtr; + } + return true; + } + return false; -#ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: if ( size >= sizeof(XMUNIBBLE4) ) { - static XMVECTORF32 s_Scale = { 15.f, 15.f, 15.f, 15.f }; + static const XMVECTORF32 s_Scale = { 15.f, 15.f, 15.f, 15.f }; XMUNIBBLE4 * __restrict dPtr = reinterpret_cast<XMUNIBBLE4*>(pDestination); - for( size_t icount = 0; icount < size; icount += sizeof(XMUNIBBLE4) ) + for( size_t icount = 0; icount < ( size - sizeof(XMUNIBBLE4) + 1 ); icount += sizeof(XMUNIBBLE4) ) { if ( sPtr >= ePtr ) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ ); v = XMVectorMultiply( v, s_Scale ); XMStoreUNibble4( dPtr++, v ); } + return true; } - return true; + return false; + + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + // Xbox One specific 7e3 format with alpha + if ( size >= sizeof(XMUDECN4) ) + { + static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f }; + static const XMVECTORF32 C = { 31.875f, 31.875f, 31.875f, 3.f }; + + XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( sPtr >= ePtr ) break; + + XMVECTOR V = XMVectorMultiply( *sPtr++, Scale ); + V = XMVectorClamp( V, g_XMZero, C ); - // We don't support the video formats ( see IsVideo function ) -#endif // DXGI_1_2_FORMATS + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, V ); + + dPtr->x = FloatTo7e3( tmp.x ); + dPtr->y = FloatTo7e3( tmp.y ); + dPtr->z = FloatTo7e3( tmp.z ); + dPtr->w = (uint32_t)tmp.w; + ++dPtr; + } + return true; + } + return false; + + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: + // Xbox One specific 6e4 format with alpha + if ( size >= sizeof(XMUDECN4) ) + { + static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f }; + static const XMVECTORF32 C = { 508.f, 508.f, 508.f, 3.f }; + + XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination); + for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) ) + { + if ( sPtr >= ePtr ) break; + + XMVECTOR V = XMVectorMultiply( *sPtr++, Scale ); + V = XMVectorClamp( V, g_XMZero, C ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, V ); + + dPtr->x = FloatTo6e4( tmp.x ); + dPtr->y = FloatTo6e4( tmp.y ); + dPtr->z = FloatTo6e4( tmp.z ); + dPtr->w = (uint32_t)tmp.w; + ++dPtr; + } + return true; + } + return false; + + // We don't support the planar or palettized formats default: return false; } } +#undef STORE_SCANLINE + //------------------------------------------------------------------------------------- // Convert DXGI image to/from GUID_WICPixelFormat128bppRGBAFloat (no range conversions) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT _ConvertToR32G32B32A32( const Image& srcImage, ScratchImage& image ) { if ( !srcImage.pixels ) @@ -1477,7 +2549,8 @@ HRESULT _ConvertToR32G32B32A32( const Image& srcImage, ScratchImage& image ) return S_OK; } -HRESULT _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ const Image& destImage ) +_Use_decl_annotations_ +HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, const Image& destImage ) { assert( srcImage.format == DXGI_FORMAT_R32G32B32A32_FLOAT ); @@ -1502,6 +2575,7 @@ HRESULT _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ const Image& return S_OK; } +_Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, DXGI_FORMAT format, ScratchImage& image ) { if ( !srcImage.pixels ) @@ -1528,6 +2602,7 @@ HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, DXGI_FORMAT format, Scr return S_OK; } +_Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, ScratchImage& result ) { if ( !srcImages ) @@ -1596,128 +2671,181 @@ HRESULT _ConvertFromR32G32B32A32( const Image* srcImages, size_t nimages, const //------------------------------------------------------------------------------------- -// RGB -> sRGB +// Convert from Linear RGB to sRGB +// +// if C_linear <= 0.0031308 -> C_srgb = 12.92 * C_linear +// if C_linear > 0.0031308 -> C_srgb = ( 1 + a ) * pow( C_Linear, 1 / 2.4 ) - a +// where a = 0.055 //------------------------------------------------------------------------------------- -static const uint32_t g_fEncodeGamma22[] = +#if DIRECTX_MATH_VERSION < 306 +static inline XMVECTOR XMColorRGBToSRGB( FXMVECTOR rgb ) { - 0x00000000, 0x3bd56bd3, 0x3c486344, 0x3c90da15, 0x3cbc2677, 0x3ce67704, 0x3d080183, 0x3d1c7728, - 0x3d30a8fb, 0x3d44a03c, 0x3d586400, 0x3d6bf9e7, 0x3d7f6679, 0x3d8956bd, 0x3d92e906, 0x3d9c6b70, - 0x3da5df22, 0x3daf451b, 0x3db89e3e, 0x3dc1eb50, 0x3dcb2d04, 0x3dd463f7, 0x3ddd90b9, 0x3de6b3ca, - 0x3defcda0, 0x3df8dea6, 0x3e00f3a0, 0x3e0573e3, 0x3e09f046, 0x3e0e68f0, 0x3e12de06, 0x3e174fa6, - 0x3e1bbdf2, 0x3e202906, 0x3e2490fd, 0x3e28f5f1, 0x3e2d57fb, 0x3e31b72f, 0x3e3613a4, 0x3e3a6d6e, - 0x3e3ec4a0, 0x3e43194d, 0x3e476b84, 0x3e4bbb57, 0x3e5008d7, 0x3e54540f, 0x3e589d0f, 0x3e5ce3e5, - 0x3e61289d, 0x3e656b44, 0x3e69abe5, 0x3e6dea8d, 0x3e722745, 0x3e766217, 0x3e7a9b0e, 0x3e7ed235, - 0x3e8183c9, 0x3e839d98, 0x3e85b68c, 0x3e87cea8, 0x3e89e5f2, 0x3e8bfc6b, 0x3e8e1219, 0x3e9026ff, - 0x3e923b20, 0x3e944e7f, 0x3e966120, 0x3e987307, 0x3e9a8436, 0x3e9c94af, 0x3e9ea476, 0x3ea0b38e, - 0x3ea2c1fb, 0x3ea4cfbb, 0x3ea6dcd5, 0x3ea8e94a, 0x3eaaf51c, 0x3ead004e, 0x3eaf0ae2, 0x3eb114d9, - 0x3eb31e37, 0x3eb526fe, 0x3eb72f2f, 0x3eb936cd, 0x3ebb3dd8, 0x3ebd4454, 0x3ebf4a43, 0x3ec14fa5, - 0x3ec3547e, 0x3ec558cd, 0x3ec75c95, 0x3ec95fd8, 0x3ecb6297, 0x3ecd64d4, 0x3ecf6690, 0x3ed167ce, - 0x3ed3688e, 0x3ed568d1, 0x3ed76899, 0x3ed967e9, 0x3edb66bf, 0x3edd651f, 0x3edf630a, 0x3ee16080, - 0x3ee35d84, 0x3ee55a16, 0x3ee75636, 0x3ee951e8, 0x3eeb4d2a, 0x3eed4800, 0x3eef4269, 0x3ef13c68, - 0x3ef335fc, 0x3ef52f26, 0x3ef727ea, 0x3ef92046, 0x3efb183c, 0x3efd0fcd, 0x3eff06fa, 0x3f007ee2, - 0x3f017a16, 0x3f027519, 0x3f036fec, 0x3f046a8f, 0x3f056502, 0x3f065f47, 0x3f07595d, 0x3f085344, - 0x3f094cfe, 0x3f0a468b, 0x3f0b3feb, 0x3f0c391e, 0x3f0d3224, 0x3f0e2aff, 0x3f0f23af, 0x3f101c32, - 0x3f11148c, 0x3f120cba, 0x3f1304bf, 0x3f13fc9a, 0x3f14f44b, 0x3f15ebd3, 0x3f16e333, 0x3f17da6b, - 0x3f18d17a, 0x3f19c860, 0x3f1abf1f, 0x3f1bb5b7, 0x3f1cac28, 0x3f1da272, 0x3f1e9895, 0x3f1f8e92, - 0x3f20846a, 0x3f217a1c, 0x3f226fa8, 0x3f23650f, 0x3f245a52, 0x3f254f70, 0x3f264469, 0x3f27393f, - 0x3f282df1, 0x3f29227f, 0x3f2a16ea, 0x3f2b0b31, 0x3f2bff56, 0x3f2cf358, 0x3f2de738, 0x3f2edaf6, - 0x3f2fce91, 0x3f30c20b, 0x3f31b564, 0x3f32a89b, 0x3f339bb1, 0x3f348ea6, 0x3f35817a, 0x3f36742f, - 0x3f3766c3, 0x3f385936, 0x3f394b8a, 0x3f3a3dbe, 0x3f3b2fd3, 0x3f3c21c8, 0x3f3d139e, 0x3f3e0556, - 0x3f3ef6ee, 0x3f3fe868, 0x3f40d9c4, 0x3f41cb01, 0x3f42bc20, 0x3f43ad22, 0x3f449e06, 0x3f458ecc, - 0x3f467f75, 0x3f477001, 0x3f486071, 0x3f4950c2, 0x3f4a40f8, 0x3f4b3111, 0x3f4c210d, 0x3f4d10ed, - 0x3f4e00b2, 0x3f4ef05a, 0x3f4fdfe7, 0x3f50cf58, 0x3f51beae, 0x3f52ade8, 0x3f539d07, 0x3f548c0c, - 0x3f557af5, 0x3f5669c4, 0x3f575878, 0x3f584711, 0x3f593590, 0x3f5a23f6, 0x3f5b1241, 0x3f5c0072, - 0x3f5cee89, 0x3f5ddc87, 0x3f5eca6b, 0x3f5fb835, 0x3f60a5e7, 0x3f619380, 0x3f6280ff, 0x3f636e65, - 0x3f645bb3, 0x3f6548e8, 0x3f663604, 0x3f672309, 0x3f680ff4, 0x3f68fcc8, 0x3f69e983, 0x3f6ad627, - 0x3f6bc2b3, 0x3f6caf27, 0x3f6d9b83, 0x3f6e87c8, 0x3f6f73f5, 0x3f70600c, 0x3f714c0b, 0x3f7237f4, - 0x3f7323c4, 0x3f740f7f, 0x3f74fb22, 0x3f75e6af, 0x3f76d225, 0x3f77bd85, 0x3f78a8ce, 0x3f799401, - 0x3f7a7f1e, 0x3f7b6a25, 0x3f7c5516, 0x3f7d3ff1, 0x3f7e2ab6, 0x3f7f1566, 0x3f800000, 0x3f800000 -}; + static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f }; + static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f }; + static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f }; + static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f }; + static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f }; + + XMVECTOR V = XMVectorSaturate(rgb); + XMVECTOR V0 = XMVectorMultiply( V, Linear ); + XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias; + XMVECTOR select = XMVectorLess( V, Cutoff ); + V = XMVectorSelect( V1, V0, select ); + return XMVectorSelect( rgb, V, g_XMSelect1110 ); +} +#endif -#pragma prefast(suppress : 25000, "FXMVECTOR is 16 bytes") -static inline XMVECTOR _TableEncodeGamma22( FXMVECTOR v ) +_Use_decl_annotations_ +bool _StoreScanlineLinear( LPVOID pDestination, size_t size, DXGI_FORMAT format, + XMVECTOR* pSource, size_t count, DWORD flags, float threshold ) { - float f[4]; - XMStoreFloat4( (XMFLOAT4*)f, v ); + assert( pDestination && size > 0 ); + assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); + assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) ); - for( size_t i=0; i < 4; ++i ) + switch ( format ) { - float f2 = sqrtf(f[i]) * 254.0f; + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + flags |= TEX_FILTER_SRGB; + break; - uint32_t i2 = static_cast<uint32_t>(f2); - i2 = std::min<uint32_t>( i2, _countof( g_fEncodeGamma22 )-2 ); + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B4G4R4A4_UNORM: + break; - float fS = f2 - (float) i2; - float fA = ((float *) g_fEncodeGamma22)[i2]; - float fB = ((float *) g_fEncodeGamma22)[i2 + 1]; + default: + // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB + flags &= ~TEX_FILTER_SRGB; + break; + } - f[i] = fA + fS * (fB - fA); + // sRGB output processing (Linear RGB -> sRGB) + if ( flags & TEX_FILTER_SRGB_OUT ) + { + // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place + // Given the intended usage in the filtering routines, this is not a problem. + XMVECTOR* ptr = pSource; + for( size_t i=0; i < count; ++i, ++ptr ) + { + *ptr = XMColorRGBToSRGB( *ptr ); + } } - return XMLoadFloat4( (XMFLOAT4*)f ); + return _StoreScanline( pDestination, size, format, pSource, count, threshold ); } //------------------------------------------------------------------------------------- -// sRGB -> RGB +// Convert from sRGB to Linear RGB +// +// if C_srgb <= 0.04045 -> C_linear = C_srgb / 12.92 +// if C_srgb > 0.04045 -> C_linear = pow( ( C_srgb + a ) / ( 1 + a ), 2.4 ) +// where a = 0.055 //------------------------------------------------------------------------------------- -static const uint32_t g_fDecodeGamma22[] = +#if DIRECTX_MATH_VERSION < 306 +static inline XMVECTOR XMColorSRGBToRGB( FXMVECTOR srgb ) { - 0x00000000, 0x3b144eb0, 0x3b9ef3b0, 0x3bf84b42, 0x3c2a5c46, 0x3c59c180, 0x3c850eb5, 0x3c9da52a, - 0x3cb6967a, 0x3ccfd852, 0x3ce9628b, 0x3d01974b, 0x3d0e9b82, 0x3d1bbba3, 0x3d28f5bc, 0x3d364822, - 0x3d43b159, 0x3d51301d, 0x3d5ec344, 0x3d6c69c9, 0x3d7a22c4, 0x3d83f6ad, 0x3d8ae465, 0x3d91da35, - 0x3d98d7c7, 0x3d9fdcd2, 0x3da6e914, 0x3dadfc47, 0x3db51635, 0x3dbc36a3, 0x3dc35d62, 0x3dca8a3a, - 0x3dd1bd02, 0x3dd8f591, 0x3de033bb, 0x3de7775d, 0x3deec050, 0x3df60e74, 0x3dfd61a6, 0x3e025ce5, - 0x3e060b61, 0x3e09bc38, 0x3e0d6f5f, 0x3e1124c8, 0x3e14dc68, 0x3e189630, 0x3e1c521a, 0x3e201016, - 0x3e23d01d, 0x3e279225, 0x3e2b5624, 0x3e2f1c10, 0x3e32e3e4, 0x3e36ad94, 0x3e3a7918, 0x3e3e4668, - 0x3e42157f, 0x3e45e654, 0x3e49b8e0, 0x3e4d8d1d, 0x3e516304, 0x3e553a8d, 0x3e5913b4, 0x3e5cee70, - 0x3e60cabf, 0x3e64a89b, 0x3e6887fb, 0x3e6c68db, 0x3e704b3a, 0x3e742f0e, 0x3e781454, 0x3e7bfb04, - 0x3e7fe321, 0x3e81e650, 0x3e83dbc0, 0x3e85d1dc, 0x3e87c8a3, 0x3e89c015, 0x3e8bb830, 0x3e8db0ee, - 0x3e8faa51, 0x3e91a454, 0x3e939ef9, 0x3e959a3b, 0x3e97961b, 0x3e999295, 0x3e9b8fa7, 0x3e9d8d52, - 0x3e9f8b93, 0x3ea18a6a, 0x3ea389d2, 0x3ea589cb, 0x3ea78a56, 0x3ea98b6e, 0x3eab8d15, 0x3ead8f47, - 0x3eaf9204, 0x3eb1954a, 0x3eb39917, 0x3eb59d6c, 0x3eb7a246, 0x3eb9a7a5, 0x3ebbad88, 0x3ebdb3ec, - 0x3ebfbad3, 0x3ec1c237, 0x3ec3ca1a, 0x3ec5d27c, 0x3ec7db58, 0x3ec9e4b4, 0x3ecbee85, 0x3ecdf8d3, - 0x3ed0039a, 0x3ed20ed8, 0x3ed41a8a, 0x3ed626b5, 0x3ed83351, 0x3eda4065, 0x3edc4de9, 0x3ede5be0, - 0x3ee06a4a, 0x3ee27923, 0x3ee4886a, 0x3ee69821, 0x3ee8a845, 0x3eeab8d8, 0x3eecc9d6, 0x3eeedb3f, - 0x3ef0ed13, 0x3ef2ff53, 0x3ef511fb, 0x3ef7250a, 0x3ef93883, 0x3efb4c61, 0x3efd60a7, 0x3eff7553, - 0x3f00c531, 0x3f01cfeb, 0x3f02dad9, 0x3f03e5f5, 0x3f04f145, 0x3f05fcc4, 0x3f070875, 0x3f081456, - 0x3f092067, 0x3f0a2ca8, 0x3f0b3917, 0x3f0c45b7, 0x3f0d5284, 0x3f0e5f7f, 0x3f0f6caa, 0x3f107a03, - 0x3f118789, 0x3f12953b, 0x3f13a31d, 0x3f14b12b, 0x3f15bf64, 0x3f16cdca, 0x3f17dc5e, 0x3f18eb1b, - 0x3f19fa05, 0x3f1b091b, 0x3f1c185c, 0x3f1d27c7, 0x3f1e375c, 0x3f1f471d, 0x3f205707, 0x3f21671b, - 0x3f227759, 0x3f2387c2, 0x3f249852, 0x3f25a90c, 0x3f26b9ef, 0x3f27cafb, 0x3f28dc30, 0x3f29ed8b, - 0x3f2aff11, 0x3f2c10bd, 0x3f2d2290, 0x3f2e348b, 0x3f2f46ad, 0x3f3058f7, 0x3f316b66, 0x3f327dfd, - 0x3f3390ba, 0x3f34a39d, 0x3f35b6a7, 0x3f36c9d6, 0x3f37dd2b, 0x3f38f0a5, 0x3f3a0443, 0x3f3b1808, - 0x3f3c2bf2, 0x3f3d4000, 0x3f3e5434, 0x3f3f688c, 0x3f407d07, 0x3f4191a8, 0x3f42a66c, 0x3f43bb54, - 0x3f44d05f, 0x3f45e58e, 0x3f46fadf, 0x3f481054, 0x3f4925ed, 0x3f4a3ba8, 0x3f4b5186, 0x3f4c6789, - 0x3f4d7daa, 0x3f4e93f0, 0x3f4faa57, 0x3f50c0e0, 0x3f51d78b, 0x3f52ee58, 0x3f540545, 0x3f551c55, - 0x3f563386, 0x3f574ad7, 0x3f58624b, 0x3f5979de, 0x3f5a9191, 0x3f5ba965, 0x3f5cc15b, 0x3f5dd971, - 0x3f5ef1a6, 0x3f6009fc, 0x3f612272, 0x3f623b08, 0x3f6353bc, 0x3f646c90, 0x3f658586, 0x3f669e98, - 0x3f67b7cb, 0x3f68d11b, 0x3f69ea8d, 0x3f6b041b, 0x3f6c1dc9, 0x3f6d3795, 0x3f6e5180, 0x3f6f6b8b, - 0x3f7085b2, 0x3f719ff7, 0x3f72ba5b, 0x3f73d4dc, 0x3f74ef7c, 0x3f760a38, 0x3f772512, 0x3f78400b, - 0x3f795b20, 0x3f7a7651, 0x3f7b91a2, 0x3f7cad0e, 0x3f7dc896, 0x3f7ee43c, 0x3f800000, 0x3f800000 -}; - + static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f }; + static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f }; + static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f }; + static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f }; + static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f }; + + XMVECTOR V = XMVectorSaturate(srgb); + XMVECTOR V0 = XMVectorMultiply( V, ILinear ); + XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma ); + XMVECTOR select = XMVectorGreater( V, Cutoff ); + V = XMVectorSelect( V0, V1, select ); + return XMVectorSelect( srgb, V, g_XMSelect1110 ); +} +#endif -#pragma prefast(suppress : 25000, "FXMVECTOR is 16 bytes") -static inline XMVECTOR _TableDecodeGamma22( FXMVECTOR v ) +_Use_decl_annotations_ +bool _LoadScanlineLinear( XMVECTOR* pDestination, size_t count, + LPCVOID pSource, size_t size, DXGI_FORMAT format, DWORD flags ) { - float f[4]; - XMStoreFloat4( (XMFLOAT4*)f, v ); + assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) ); + assert( pSource && size > 0 ); + assert( IsValid(format) && !IsTypeless(format,false) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) ); - for( size_t i=0; i < 4; ++i ) + switch ( format ) { - float f2 = f[i] * f[i] * 254.0f; - uint32_t i2 = static_cast<uint32_t>(f2); - i2 = std::min<uint32_t>( i2, _countof(g_fDecodeGamma22)-2 ); + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + flags |= TEX_FILTER_SRGB; + break; - float fS = f2 - (float) i2; - float fA = ((float *) g_fDecodeGamma22)[i2]; - float fB = ((float *) g_fDecodeGamma22)[i2 + 1]; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B4G4R4A4_UNORM: + break; - f[i] = fA + fS * (fB - fA); + default: + // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB + flags &= ~TEX_FILTER_SRGB; + break; } - return XMLoadFloat4( (XMFLOAT4*)f ); + if ( _LoadScanline( pDestination, count, pSource, size, format ) ) + { + // sRGB input processing (sRGB -> Linear RGB) + if ( flags & TEX_FILTER_SRGB_IN ) + { + XMVECTOR* ptr = pDestination; + for( size_t i=0; i < count; ++i, ++ptr ) + { + *ptr = XMColorSRGBToRGB( *ptr ); + } + } + + return true; + } + + return false; } @@ -1798,16 +2926,24 @@ static const ConvertData g_ConvertTable[] = { { DXGI_FORMAT_B5G5R5A1_UNORM, 5, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8A8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8X8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B }, - { DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, 10, CONVF_UNORM | CONVF_X2 | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, + { DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, 10, CONVF_UNORM | CONVF_XR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_BC6H_UF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC6H_SF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC7_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, { DXGI_FORMAT_BC7_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, -#ifdef DXGI_1_2_FORMATS + { DXGI_FORMAT_AYUV, 8, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, + { DXGI_FORMAT_Y410, 10, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, + { DXGI_FORMAT_Y416, 16, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, + { DXGI_FORMAT_YUY2, 8, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B }, + { DXGI_FORMAT_Y210, 10, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B }, + { DXGI_FORMAT_Y216, 16, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B }, { DXGI_FORMAT_B4G4R4A4_UNORM, 4, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, -#endif + { DXGI_FORMAT(116) + /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, + { DXGI_FORMAT(117) + /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A }, }; #pragma prefast( suppress : 25004, "Signature must match bsearch" ); @@ -1819,6 +2955,7 @@ static int __cdecl _ConvertCompare( const void* ptr1, const void *ptr2 ) else return (p1->format < p2->format ) ? -1 : 1; } +_Use_decl_annotations_ DWORD _GetConvertFlags( DXGI_FORMAT format ) { #ifdef _DEBUG @@ -1838,13 +2975,14 @@ DWORD _GetConvertFlags( DXGI_FORMAT format ) return (in) ? in->flags : 0; } +_Use_decl_annotations_ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, DXGI_FORMAT inFormat, DWORD flags ) { #if !defined(_XM_NO_INTRINSICS_) assert( pBuffer && count > 0 && (((uintptr_t)pBuffer & 0xF) == 0) ); #endif - assert( IsValid(outFormat) && !IsVideo(outFormat) && !IsTypeless(outFormat) ); - assert( IsValid(inFormat) && !IsVideo(inFormat) && !IsTypeless(inFormat) ); + assert( IsValid(outFormat) && !IsTypeless(outFormat) && !IsPlanar(outFormat) && !IsPalettized(outFormat) ); + assert( IsValid(inFormat) && !IsTypeless(inFormat) && !IsPlanar(inFormat) && !IsPalettized(inFormat) ); if ( !pBuffer ) return; @@ -1877,44 +3015,332 @@ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, D assert( _GetConvertFlags( outFormat ) == out->flags ); // Handle SRGB filtering modes - if ( IsSRGB( inFormat ) ) + switch ( inFormat ) + { + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_BC7_UNORM_SRGB: flags |= TEX_FILTER_SRGB_IN; + break; - if ( IsSRGB( outFormat ) ) - flags |= TEX_FILTER_SRGB_OUT; - - if ( in->flags & CONVF_SNORM ) + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: flags &= ~TEX_FILTER_SRGB_IN; + break; + } + + switch ( outFormat ) + { + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_BC7_UNORM_SRGB: + flags |= TEX_FILTER_SRGB_OUT; + break; - if ( out->flags & CONVF_SNORM ) + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: flags &= ~TEX_FILTER_SRGB_OUT; + break; + } if ( (flags & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) == (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT) ) { flags &= ~(TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT); } - // sRGB input processing (sRGB -> RGB) + // sRGB input processing (sRGB -> Linear RGB) if ( flags & TEX_FILTER_SRGB_IN ) { - if ( (in->flags & CONVF_FLOAT) || (in->flags & CONVF_UNORM) ) + if ( !(in->flags & CONVF_DEPTH) && ( (in->flags & CONVF_FLOAT) || (in->flags & CONVF_UNORM) ) ) { XMVECTOR* ptr = pBuffer; - for( size_t i=0; i < count; ++i ) + for( size_t i=0; i < count; ++i, ++ptr ) { - // rgb = rgb^(2.2); a=a - XMVECTOR v = *ptr; - XMVECTOR v1 = _TableDecodeGamma22( v ); - *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr = XMColorSRGBToRGB( *ptr ); } } } // Handle conversion special cases DWORD diffFlags = in->flags ^ out->flags; - if ( diffFlags != 0) + if ( diffFlags != 0 ) { - if ( out->flags & CONVF_UNORM ) + static const XMVECTORF32 s_two = { 2.0f, 2.0f, 2.0f, 2.0f }; + + if ( diffFlags & CONVF_DEPTH ) + { + if ( in->flags & CONVF_DEPTH ) + { + // CONVF_DEPTH -> !CONVF_DEPTH + if ( in->flags & CONVF_STENCIL ) + { + // Stencil -> Alpha + static const XMVECTORF32 S = { 1.f, 1.f, 1.f, 255.f }; + + if( out->flags & CONVF_UNORM ) + { + // UINT -> UNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatY( v ); + v1 = XMVectorClamp( v1, g_XMZero, S ); + v1 = XMVectorDivide( v1, S ); + v = XMVectorSelect( v1, v, g_XMSelect1110 ); + *ptr++ = v; + } + } + else if ( out->flags & CONVF_SNORM ) + { + // UINT -> SNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatY( v ); + v1 = XMVectorClamp( v1, g_XMZero, S ); + v1 = XMVectorDivide( v1, S ); + v1 = XMVectorMultiplyAdd( v1, s_two, g_XMNegativeOne ); + v = XMVectorSelect( v1, v, g_XMSelect1110 ); + *ptr++ = v; + } + } + else + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatY( v ); + v = XMVectorSelect( v1, v, g_XMSelect1110 ); + *ptr++ = v; + } + } + } + + // Depth -> RGB + if ( ( out->flags & CONVF_UNORM ) && ( in->flags & CONVF_FLOAT ) ) + { + // Depth FLOAT -> UNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSaturate( v ); + v1 = XMVectorSplatX( v1 ); + v = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr++ = v; + } + } + else if ( out->flags & CONVF_SNORM ) + { + if ( in->flags & CONVF_UNORM ) + { + // Depth UNORM -> SNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorMultiplyAdd( v, s_two, g_XMNegativeOne ); + v1 = XMVectorSplatX( v1 ); + v = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr++ = v; + } + } + else + { + // Depth FLOAT -> SNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); + v1 = XMVectorSplatX( v1 ); + v = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr++ = v; + } + } + } + else + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatX( v ); + v = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr++ = v; + } + } + } + else + { + // !CONVF_DEPTH -> CONVF_DEPTH + + // RGB -> Depth (red channel) + switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) + { + case TEX_FILTER_RGB_COPY_GREEN: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatY( v ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + break; + + case TEX_FILTER_RGB_COPY_BLUE: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatZ( v ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + break; + + default: + if ( (in->flags & CONVF_UNORM) && ( (in->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) ) + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVector3Dot( v, g_Grayscale ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + break; + } + + // fall-through + + case TEX_FILTER_RGB_COPY_RED: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatX( v ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + break; + } + + // Finialize type conversion for depth (red channel) + if ( out->flags & CONVF_UNORM ) + { + if ( in->flags & CONVF_SNORM ) + { + // SNORM -> UNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorMultiplyAdd( v, g_XMOneHalf, g_XMOneHalf ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + else if ( in->flags & CONVF_FLOAT ) + { + // FLOAT -> UNORM + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSaturate( v ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + } + + if ( out->flags & CONVF_STENCIL ) + { + // Alpha -> Stencil (green channel) + static const XMVECTORU32 select0100 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0 }; + static const XMVECTORF32 S = { 255.f, 255.f, 255.f, 255.f }; + + if ( in->flags & CONVF_UNORM ) + { + // UNORM -> UINT + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorMultiply( v, S ); + v1 = XMVectorSplatW( v1 ); + v = XMVectorSelect( v, v1, select0100 ); + *ptr++ = v; + } + } + else if ( in->flags & CONVF_SNORM ) + { + // SNORM -> UINT + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorMultiplyAdd( v, g_XMOneHalf, g_XMOneHalf ); + v1 = XMVectorMultiply( v1, S ); + v1 = XMVectorSplatW( v1 ); + v = XMVectorSelect( v, v1, select0100 ); + *ptr++ = v; + } + } + else + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatW( v ); + v = XMVectorSelect( v, v1, select0100 ); + *ptr++ = v; + } + } + } + } + } + else if ( out->flags & CONVF_DEPTH ) + { + // CONVF_DEPTH -> CONVF_DEPTH + if ( diffFlags & CONVF_FLOAT ) + { + if ( in->flags & CONVF_FLOAT ) + { + // FLOAT -> UNORM depth, preserve stencil + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSaturate( v ); + v = XMVectorSelect( v, v1, g_XMSelect1000 ); + *ptr++ = v; + } + } + } + } + else if ( out->flags & CONVF_UNORM ) { if ( in->flags & CONVF_SNORM ) { @@ -1942,12 +3368,11 @@ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, D if ( in->flags & CONVF_UNORM ) { // UNORM -> SNORM - static XMVECTORF32 two = { 2.0f, 2.0f, 2.0f, 2.0f }; XMVECTOR* ptr = pBuffer; for( size_t i=0; i < count; ++i ) { XMVECTOR v = *ptr; - *ptr++ = XMVectorMultiplyAdd( v, two, g_XMNegativeOne ); + *ptr++ = XMVectorMultiplyAdd( v, s_two, g_XMNegativeOne ); } } else if ( in->flags & CONVF_FLOAT ) @@ -1969,11 +3394,54 @@ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, D if ( ((out->flags & CONVF_RGBA_MASK) == CONVF_A) && !(in->flags & CONVF_A) ) { // !CONVF_A -> A format - XMVECTOR* ptr = pBuffer; - for( size_t i=0; i < count; ++i ) + switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) { - XMVECTOR v = *ptr; - *ptr++ = XMVectorSplatX( v ); + case TEX_FILTER_RGB_COPY_GREEN: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + *ptr++ = XMVectorSplatY( v ); + } + } + break; + + case TEX_FILTER_RGB_COPY_BLUE: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + *ptr++ = XMVectorSplatZ( v ); + } + } + break; + + default: + if ( (in->flags & CONVF_UNORM) && ( (in->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) ) + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + *ptr++ = XMVector3Dot( v, g_Grayscale ); + } + break; + } + + // fall-through + + case TEX_FILTER_RGB_COPY_RED: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + *ptr++ = XMVectorSplatX( v ); + } + } + break; } } else if ( ((in->flags & CONVF_RGBA_MASK) == CONVF_A) && !(out->flags & CONVF_A) ) @@ -1986,31 +3454,129 @@ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, D *ptr++ = XMVectorSplatW( v ); } } - else if ( ((in->flags & CONVF_RGB_MASK) == CONVF_R) && ((out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B)) ) + else if ( (in->flags & CONVF_RGB_MASK) == CONVF_R ) { - // R format -> RGB format - XMVECTOR* ptr = pBuffer; - for( size_t i=0; i < count; ++i ) + if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) { - XMVECTOR v = *ptr; - XMVECTOR v1 = XMVectorSplatX( v ); - *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + // R format -> RGB format + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatX( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + } + } + else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) ) + { + // R format -> RG format + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatX( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); + } + } + } + else if ( (in->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) ) + { + if ( (out->flags & CONVF_RGB_MASK) == CONVF_R ) + { + // RGB format -> R format + switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) + { + case TEX_FILTER_RGB_COPY_GREEN: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatY( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + } + } + break; + + case TEX_FILTER_RGB_COPY_BLUE: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSplatZ( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + } + } + break; + + default: + if ( in->flags & CONVF_UNORM ) + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVector3Dot( v, g_Grayscale ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + } + break; + } + + // fall-through + + case TEX_FILTER_RGB_COPY_RED: + // Leave data unchanged and the store will handle this... + break; + } + } + else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) ) + { + // RGB format -> RG format + switch( flags & ( TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE ) ) + { + case TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_BLUE: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSwizzle<0,2,0,2>( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); + } + } + break; + + case TEX_FILTER_RGB_COPY_GREEN | TEX_FILTER_RGB_COPY_BLUE: + { + XMVECTOR* ptr = pBuffer; + for( size_t i=0; i < count; ++i ) + { + XMVECTOR v = *ptr; + XMVECTOR v1 = XMVectorSwizzle<1,2,3,0>( v ); + *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 ); + } + } + break; + + case TEX_FILTER_RGB_COPY_RED | TEX_FILTER_RGB_COPY_GREEN: + default: + // Leave data unchanged and the store will handle this... + break; + } } } } - // sRGB output processing (RGB -> sRGB) + // sRGB output processing (Linear RGB -> sRGB) if ( flags & TEX_FILTER_SRGB_OUT ) { - if ( (out->flags & CONVF_FLOAT) || (out->flags & CONVF_UNORM) ) + if ( !(out->flags & CONVF_DEPTH) && ( (out->flags & CONVF_FLOAT) || (out->flags & CONVF_UNORM) ) ) { XMVECTOR* ptr = pBuffer; - for( size_t i=0; i < count; ++i ) + for( size_t i=0; i < count; ++i, ++ptr ) { - // rgb = rgb^(1/2.2); a=a - XMVECTOR v = *ptr; - XMVECTOR v1 = _TableEncodeGamma22( v ); - *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 ); + *ptr = XMColorRGBToSRGB( *ptr ); } } } @@ -2018,129 +3584,706 @@ void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, D //------------------------------------------------------------------------------------- -// Convert the source image using WIC +// Dithering //------------------------------------------------------------------------------------- -static HRESULT _ConvertUsingWIC( _In_ const Image& srcImage, _In_ const WICPixelFormatGUID& pfGUID, - _In_ const WICPixelFormatGUID& targetGUID, - _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage ) + +// 4X4X4 ordered dithering matrix +static const float g_Dither[] = { - assert( srcImage.width == destImage.width ); - assert( srcImage.height == destImage.height ); + // (z & 3) + ( (y & 3) * 8) + (x & 3) + 0.468750f, -0.031250f, 0.343750f, -0.156250f, 0.468750f, -0.031250f, 0.343750f, -0.156250f, + -0.281250f, 0.218750f, -0.406250f, 0.093750f, -0.281250f, 0.218750f, -0.406250f, 0.093750f, + 0.281250f, -0.218750f, 0.406250f, -0.093750f, 0.281250f, -0.218750f, 0.406250f, -0.093750f, + -0.468750f, 0.031250f, -0.343750f, 0.156250f, -0.468750f, 0.031250f, -0.343750f, 0.156250f, +}; - IWICImagingFactory* pWIC = _GetWIC(); - if ( !pWIC ) - return E_NOINTERFACE; +static const XMVECTORF32 g_Scale16pc = { 65535.f, 65535.f, 65535.f, 65535.f }; +static const XMVECTORF32 g_Scale15pc = { 32767.f, 32767.f, 32767.f, 32767.f }; +static const XMVECTORF32 g_Scale10pc = { 1023.f, 1023.f, 1023.f, 3.f }; +static const XMVECTORF32 g_Scale8pc = { 255.f, 255.f, 255.f, 255.f }; +static const XMVECTORF32 g_Scale7pc = { 127.f, 127.f, 127.f, 127.f }; +static const XMVECTORF32 g_Scale565pc = { 31.f, 63.f, 31.f, 1.f }; +static const XMVECTORF32 g_Scale5551pc = { 31.f, 31.f, 31.f, 1.f }; +static const XMVECTORF32 g_Scale4pc = { 15.f, 15.f, 15.f, 15.f }; + +static const XMVECTORF32 g_ErrorWeight3 = { 3.f/16.f, 3.f/16.f, 3.f/16.f, 3.f/16.f }; +static const XMVECTORF32 g_ErrorWeight5 = { 5.f/16.f, 5.f/16.f, 5.f/16.f, 5.f/16.f }; +static const XMVECTORF32 g_ErrorWeight1 = { 1.f/16.f, 1.f/16.f, 1.f/16.f, 1.f/16.f }; +static const XMVECTORF32 g_ErrorWeight7 = { 7.f/16.f, 7.f/16.f, 7.f/16.f, 7.f/16.f }; + +#define STORE_SCANLINE( type, scalev, clampzero, norm, itype, mask, row, bgr ) \ + if ( size >= sizeof(type) ) \ + { \ + type * __restrict dest = reinterpret_cast<type*>(pDestination); \ + for( size_t i = 0; i < count; ++i ) \ + { \ + ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \ + ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ + \ + XMVECTOR v = sPtr[ index ]; \ + if ( bgr ) { v = XMVectorSwizzle<2, 1, 0, 3>( v ); } \ + if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ + else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ + else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ + else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ + v = XMVectorAdd( v, vError ); \ + if ( norm ) v = XMVectorMultiply( v, scalev ); \ + \ + XMVECTOR target; \ + if ( pDiffusionErrors ) \ + { \ + target = XMVectorRound( v ); \ + vError = XMVectorSubtract( v, target ); \ + if (norm) vError = XMVectorDivide( vError, scalev ); \ + \ + /* Distribute error to next scanline and next pixel */ \ + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ + } \ + else \ + { \ + /* Applied ordered dither */ \ + target = XMVectorAdd( v, ordered[ index & 3 ] ); \ + target = XMVectorRound( target ); \ + } \ + \ + target = XMVectorMin( scalev, target ); \ + target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ + \ + XMFLOAT4A tmp; \ + XMStoreFloat4A( &tmp, target ); \ + \ + auto dPtr = &dest[ index ]; \ + dPtr->x = static_cast<itype>( tmp.x ) & mask; \ + dPtr->y = static_cast<itype>( tmp.y ) & mask; \ + dPtr->z = static_cast<itype>( tmp.z ) & mask; \ + dPtr->w = static_cast<itype>( tmp.w ) & mask; \ + } \ + return true; \ + } \ + return false; - ScopedObject<IWICFormatConverter> FC; - HRESULT hr = pWIC->CreateFormatConverter( &FC ); - if ( FAILED(hr) ) - return hr; +#define STORE_SCANLINE2( type, scalev, clampzero, norm, itype, mask, row ) \ + /* The 2 component cases are always bgr=false */ \ + if ( size >= sizeof(type) ) \ + { \ + type * __restrict dest = reinterpret_cast<type*>(pDestination); \ + for( size_t i = 0; i < count; ++i ) \ + { \ + ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \ + ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ + \ + XMVECTOR v = sPtr[ index ]; \ + if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ + else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ + else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ + else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ + v = XMVectorAdd( v, vError ); \ + if ( norm ) v = XMVectorMultiply( v, scalev ); \ + \ + XMVECTOR target; \ + if ( pDiffusionErrors ) \ + { \ + target = XMVectorRound( v ); \ + vError = XMVectorSubtract( v, target ); \ + if (norm) vError = XMVectorDivide( vError, scalev ); \ + \ + /* Distribute error to next scanline and next pixel */ \ + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ + } \ + else \ + { \ + /* Applied ordered dither */ \ + target = XMVectorAdd( v, ordered[ index & 3 ] ); \ + target = XMVectorRound( target ); \ + } \ + \ + target = XMVectorMin( scalev, target ); \ + target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ + \ + XMFLOAT4A tmp; \ + XMStoreFloat4A( &tmp, target ); \ + \ + auto dPtr = &dest[ index ]; \ + dPtr->x = static_cast<itype>( tmp.x ) & mask; \ + dPtr->y = static_cast<itype>( tmp.y ) & mask; \ + } \ + return true; \ + } \ + return false; - // Need to implement usage of TEX_FILTER_SRGB_IN/TEX_FILTER_SRGB_OUT +#define STORE_SCANLINE1( type, scalev, clampzero, norm, mask, row, selectw ) \ + /* The 1 component cases are always bgr=false */ \ + if ( size >= sizeof(type) ) \ + { \ + type * __restrict dest = reinterpret_cast<type*>(pDestination); \ + for( size_t i = 0; i < count; ++i ) \ + { \ + ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \ + ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \ + \ + XMVECTOR v = sPtr[ index ]; \ + if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \ + else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \ + else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \ + else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \ + v = XMVectorAdd( v, vError ); \ + if ( norm ) v = XMVectorMultiply( v, scalev ); \ + \ + XMVECTOR target; \ + if ( pDiffusionErrors ) \ + { \ + target = XMVectorRound( v ); \ + vError = XMVectorSubtract( v, target ); \ + if (norm) vError = XMVectorDivide( vError, scalev ); \ + \ + /* Distribute error to next scanline and next pixel */ \ + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \ + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \ + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \ + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \ + } \ + else \ + { \ + /* Applied ordered dither */ \ + target = XMVectorAdd( v, ordered[ index & 3 ] ); \ + target = XMVectorRound( target ); \ + } \ + \ + target = XMVectorMin( scalev, target ); \ + target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \ + \ + dest[ index ] = static_cast<type>( (selectw) ? XMVectorGetW( target ) : XMVectorGetX( target ) ) & mask; \ + } \ + return true; \ + } \ + return false; - BOOL canConvert = FALSE; - hr = FC->CanConvert( pfGUID, targetGUID, &canConvert ); - if ( FAILED(hr) || !canConvert ) +#pragma warning(push) +#pragma warning( disable : 4127 ) + +_Use_decl_annotations_ +bool _StoreScanlineDither( LPVOID pDestination, size_t size, DXGI_FORMAT format, + XMVECTOR* pSource, size_t count, float threshold, size_t y, size_t z, XMVECTOR* pDiffusionErrors ) +{ + assert( pDestination && size > 0 ); + assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) ); + assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) ); + + XMVECTOR ordered[4]; + if ( pDiffusionErrors ) { - // This case is not an issue for the subset of WIC formats that map directly to DXGI - return E_UNEXPECTED; + // If pDiffusionErrors != 0, then this function performs error diffusion dithering (aka Floyd-Steinberg dithering) + + // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place + // Given the intended usage in the conversion routines, this is not a problem. + + XMVECTOR* ptr = pSource; + const XMVECTOR* err = pDiffusionErrors + 1; + for( size_t i=0; i < count; ++i ) + { + // Add contribution from previous scanline + XMVECTOR v = XMVectorAdd( *ptr, *err++ ); + *ptr++ = v; + } + + // Reset errors for next scanline + memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(count+2) ); } + else + { + // If pDiffusionErrors == 0, then this function performs ordered dithering - ScopedObject<IWICBitmap> source; - hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID, - static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ), - srcImage.pixels, &source ); - if ( FAILED(hr) ) - return hr; + XMVECTOR dither = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>( g_Dither + (z & 3) + ( (y & 3) * 8 ) ) ); - hr = FC->Initialize( source.Get(), targetGUID, _GetWICDither( filter ), 0, threshold, WICBitmapPaletteTypeCustom ); - if ( FAILED(hr) ) - return hr; + ordered[0] = XMVectorSplatX( dither ); + ordered[1] = XMVectorSplatY( dither ); + ordered[2] = XMVectorSplatZ( dither ); + ordered[3] = XMVectorSplatW( dither ); + } - hr = FC->CopyPixels( 0, static_cast<UINT>( destImage.rowPitch ), static_cast<UINT>( destImage.slicePitch ), destImage.pixels ); - if ( FAILED(hr) ) - return hr; + const XMVECTOR* __restrict sPtr = pSource; + if ( !sPtr ) + return false; - return S_OK; + XMVECTOR vError = XMVectorZero(); + + switch( format ) + { + case DXGI_FORMAT_R16G16B16A16_UNORM: + STORE_SCANLINE( XMUSHORTN4, g_Scale16pc, true, true, uint16_t, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16G16B16A16_UINT: + STORE_SCANLINE( XMUSHORT4, g_Scale16pc, true, false, uint16_t, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16G16B16A16_SNORM: + STORE_SCANLINE( XMSHORTN4, g_Scale15pc, false, true, int16_t, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16G16B16A16_SINT: + STORE_SCANLINE( XMSHORT4, g_Scale15pc, false, false, int16_t, 0xFFFF, y, false ) + + case DXGI_FORMAT_R10G10B10A2_UNORM: + STORE_SCANLINE( XMUDECN4, g_Scale10pc, true, true, uint16_t, 0x3FF, y, false ) + + case DXGI_FORMAT_R10G10B10A2_UINT: + STORE_SCANLINE( XMUDEC4, g_Scale10pc, true, false, uint16_t, 0x3FF, y, false ) + + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + if ( size >= sizeof(XMUDEC4) ) + { + static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f }; + static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f }; + static const XMVECTORF32 MinXR = { -0.7529f, -0.7529f, -0.7529f, 0.f }; + static const XMVECTORF32 MaxXR = { 1.2529f, 1.2529f, 1.2529f, 1.0f }; + + XMUDEC4 * __restrict dest = reinterpret_cast<XMUDEC4*>(pDestination); + for( size_t i = 0; i < count; ++i ) + { + ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i ); + ptrdiff_t delta = ( y & 1 ) ? -2 : 0; + + XMVECTOR v = XMVectorClamp( sPtr[ index ], MinXR, MaxXR ); + v = XMVectorMultiplyAdd( v, Scale, vError ); + + XMVECTOR target; + if ( pDiffusionErrors ) + { + target = XMVectorRound( v ); + vError = XMVectorSubtract( v, target ); + vError = XMVectorDivide( vError, Scale ); + + // Distribute error to next scanline and next pixel + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); + } + else + { + // Applied ordered dither + target = XMVectorAdd( v, ordered[ index & 3 ] ); + target = XMVectorRound( target ); + } + + target = XMVectorAdd( target, Bias ); + target = XMVectorClamp( target, g_XMZero, g_Scale10pc ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, target ); + + auto dPtr = &dest[ index ]; + dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x3FF; + dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x3FF; + dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x3FF; + dPtr->w = static_cast<uint16_t>( tmp.w ); + } + return true; + } + return false; + + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, false ) + + case DXGI_FORMAT_R8G8B8A8_UINT: + STORE_SCANLINE( XMUBYTE4, g_Scale8pc, true, false, uint8_t, 0xFF, y, false ) + + case DXGI_FORMAT_R8G8B8A8_SNORM: + STORE_SCANLINE( XMBYTEN4, g_Scale7pc, false, true, int8_t, 0xFF, y, false ) + + case DXGI_FORMAT_R8G8B8A8_SINT: + STORE_SCANLINE( XMBYTE4, g_Scale7pc, false, false, int8_t, 0xFF, y, false ) + + case DXGI_FORMAT_R16G16_UNORM: + STORE_SCANLINE2( XMUSHORTN2, g_Scale16pc, true, true, uint16_t, 0xFFFF, y ) + + case DXGI_FORMAT_R16G16_UINT: + STORE_SCANLINE2( XMUSHORT2, g_Scale16pc, true, false, uint16_t, 0xFFFF, y ) + + case DXGI_FORMAT_R16G16_SNORM: + STORE_SCANLINE2( XMSHORTN2, g_Scale15pc, false, true, int16_t, 0xFFFF, y ) + + case DXGI_FORMAT_R16G16_SINT: + STORE_SCANLINE2( XMSHORT2, g_Scale15pc, false, false, int16_t, 0xFFFF, y ) + + case DXGI_FORMAT_D24_UNORM_S8_UINT: + if ( size >= sizeof(uint32_t) ) + { + static const XMVECTORF32 Clamp = { 1.f, 255.f, 0.f, 0.f }; + static const XMVECTORF32 Scale = { 16777215.f, 1.f, 0.f, 0.f }; + static const XMVECTORF32 Scale2 = { 16777215.f, 255.f, 0.f, 0.f }; + + uint32_t * __restrict dest = reinterpret_cast<uint32_t*>(pDestination); + for( size_t i = 0; i < count; ++i ) + { + ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i ); + ptrdiff_t delta = ( y & 1 ) ? -2 : 0; + + XMVECTOR v = XMVectorClamp( sPtr[ index ], g_XMZero, Clamp ); + v = XMVectorAdd( v, vError ); + v = XMVectorMultiply( v, Scale ); + + XMVECTOR target; + if ( pDiffusionErrors ) + { + target = XMVectorRound( v ); + vError = XMVectorSubtract( v, target ); + vError = XMVectorDivide( vError, Scale ); + + // Distribute error to next scanline and next pixel + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); + } + else + { + // Applied ordered dither + target = XMVectorAdd( v, ordered[ index & 3 ] ); + target = XMVectorRound( target ); + } + + target = XMVectorClamp( target, g_XMZero, Scale2 ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, target ); + + auto dPtr = &dest[ index ]; + *dPtr = (static_cast<uint32_t>( tmp.x ) & 0xFFFFFF) + | ((static_cast<uint32_t>( tmp.y ) & 0xFF) << 24); + } + return true; + } + return false; + + case DXGI_FORMAT_R8G8_UNORM: + STORE_SCANLINE2( XMUBYTEN2, g_Scale8pc, true, true, uint8_t, 0xFF, y ) + + case DXGI_FORMAT_R8G8_UINT: + STORE_SCANLINE2( XMUBYTE2, g_Scale8pc, true, false, uint8_t, 0xFF, y ) + + case DXGI_FORMAT_R8G8_SNORM: + STORE_SCANLINE2( XMBYTEN2, g_Scale7pc, false, true, int8_t, 0xFF, y ) + + case DXGI_FORMAT_R8G8_SINT: + STORE_SCANLINE2( XMBYTE2, g_Scale7pc, false, false, int8_t, 0xFF, y ) + + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + STORE_SCANLINE1( uint16_t, g_Scale16pc, true, true, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16_UINT: + STORE_SCANLINE1( uint16_t, g_Scale16pc, true, false, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16_SNORM: + STORE_SCANLINE1( int16_t, g_Scale15pc, false, true, 0xFFFF, y, false ) + + case DXGI_FORMAT_R16_SINT: + STORE_SCANLINE1( int16_t, g_Scale15pc, false, false, 0xFFFF, y, false ) + + case DXGI_FORMAT_R8_UNORM: + STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, false ) + + case DXGI_FORMAT_R8_UINT: + STORE_SCANLINE1( uint8_t, g_Scale8pc, true, false, 0xFF, y, false ) + + case DXGI_FORMAT_R8_SNORM: + STORE_SCANLINE1( int8_t, g_Scale7pc, false, true, 0xFF, y, false ) + + case DXGI_FORMAT_R8_SINT: + STORE_SCANLINE1( int8_t, g_Scale7pc, false, false, 0xFF, y, false ) + + case DXGI_FORMAT_A8_UNORM: + STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, true ) + + case DXGI_FORMAT_B5G6R5_UNORM: + if ( size >= sizeof(XMU565) ) + { + XMU565 * __restrict dest = reinterpret_cast<XMU565*>(pDestination); + for( size_t i = 0; i < count; ++i ) + { + ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i ); + ptrdiff_t delta = ( y & 1 ) ? -2 : 0; + + XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); + v = XMVectorSaturate( v ); + v = XMVectorAdd( v, vError ); + v = XMVectorMultiply( v, g_Scale565pc ); + + XMVECTOR target; + if ( pDiffusionErrors ) + { + target = XMVectorRound( v ); + vError = XMVectorSubtract( v, target ); + vError = XMVectorDivide( vError, g_Scale565pc ); + + // Distribute error to next scanline and next pixel + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); + } + else + { + // Applied ordered dither + target = XMVectorAdd( v, ordered[ index & 3 ] ); + target = XMVectorRound( target ); + } + + target = XMVectorClamp( target, g_XMZero, g_Scale565pc ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, target ); + + auto dPtr = &dest[ index ]; + dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x1F; + dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x3F; + dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x1F; + } + return true; + } + return false; + + case DXGI_FORMAT_B5G5R5A1_UNORM: + if ( size >= sizeof(XMU555) ) + { + XMU555 * __restrict dest = reinterpret_cast<XMU555*>(pDestination); + for( size_t i = 0; i < count; ++i ) + { + ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i ); + ptrdiff_t delta = ( y & 1 ) ? -2 : 0; + + XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); + v = XMVectorSaturate( v ); + v = XMVectorAdd( v, vError ); + v = XMVectorMultiply( v, g_Scale5551pc ); + + XMVECTOR target; + if ( pDiffusionErrors ) + { + target = XMVectorRound( v ); + vError = XMVectorSubtract( v, target ); + vError = XMVectorDivide( vError, g_Scale5551pc ); + + // Distribute error to next scanline and next pixel + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); + } + else + { + // Applied ordered dither + target = XMVectorAdd( v, ordered[ index & 3 ] ); + target = XMVectorRound( target ); + } + + target = XMVectorClamp( target, g_XMZero, g_Scale5551pc ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, target ); + + auto dPtr = &dest[ index ]; + dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x1F; + dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x1F; + dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x1F; + dPtr->w = ( XMVectorGetW( target ) > threshold ) ? 1 : 0; + } + return true; + } + return false; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, true ) + + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + if ( size >= sizeof(XMUBYTEN4) ) + { + XMUBYTEN4 * __restrict dest = reinterpret_cast<XMUBYTEN4*>(pDestination); + for( size_t i = 0; i < count; ++i ) + { + ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i ); + ptrdiff_t delta = ( y & 1 ) ? -2 : 0; + + XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] ); + v = XMVectorSaturate( v ); + v = XMVectorAdd( v, vError ); + v = XMVectorMultiply( v, g_Scale8pc ); + + XMVECTOR target; + if ( pDiffusionErrors ) + { + target = XMVectorRound( v ); + vError = XMVectorSubtract( v, target ); + vError = XMVectorDivide( vError, g_Scale8pc ); + + // Distribute error to next scanline and next pixel + pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); + pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); + pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); + vError = XMVectorMultiply( vError, g_ErrorWeight7 ); + } + else + { + // Applied ordered dither + target = XMVectorAdd( v, ordered[ index & 3 ] ); + target = XMVectorRound( target ); + } + + target = XMVectorClamp( target, g_XMZero, g_Scale8pc ); + + XMFLOAT4A tmp; + XMStoreFloat4A( &tmp, target ); + + auto dPtr = &dest[ index ]; + dPtr->x = static_cast<uint8_t>( tmp.x ) & 0xFF; + dPtr->y = static_cast<uint8_t>( tmp.y ) & 0xFF; + dPtr->z = static_cast<uint8_t>( tmp.z ) & 0xFF; + dPtr->w = 0; + } + return true; + } + return false; + + case DXGI_FORMAT_B4G4R4A4_UNORM: + STORE_SCANLINE( XMUNIBBLE4, g_Scale4pc, true, true, uint8_t, 0xF, y, true ) + + default: + return _StoreScanline( pDestination, size, format, pSource, count, threshold ); + } } +#pragma warning(pop) + +#undef STORE_SCANLINE +#undef STORE_SCANLINE2 +#undef STORE_SCANLINE1 + //------------------------------------------------------------------------------------- -// Convert the source using WIC and then convert to DXGI format from there +// Selection logic for using WIC vs. our own routines //------------------------------------------------------------------------------------- -static HRESULT _ConvertFromWIC( _In_ const Image& srcImage, _In_ const WICPixelFormatGUID& pfGUID, - _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage ) +static inline bool _UseWICConversion( _In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat, + _Out_ WICPixelFormatGUID& pfGUID, _Out_ WICPixelFormatGUID& targetGUID ) { - assert( srcImage.width == destImage.width ); - assert( srcImage.height == destImage.height ); + memcpy( &pfGUID, &GUID_NULL, sizeof(GUID) ); + memcpy( &targetGUID, &GUID_NULL, sizeof(GUID) ); - IWICImagingFactory* pWIC = _GetWIC(); - if ( !pWIC ) - return E_NOINTERFACE; + if ( filter & TEX_FILTER_FORCE_NON_WIC ) + { + // Explicit flag indicates use of non-WIC code paths + return false; + } - ScopedObject<IWICFormatConverter> FC; - HRESULT hr = pWIC->CreateFormatConverter( &FC ); - if ( FAILED(hr) ) - return hr; + if ( !_DXGIToWIC( sformat, pfGUID ) || !_DXGIToWIC( tformat, targetGUID ) ) + { + // Source or target format are not WIC supported native pixel formats + return false; + } - BOOL canConvert = FALSE; - hr = FC->CanConvert( pfGUID, GUID_WICPixelFormat128bppRGBAFloat, &canConvert ); - if ( FAILED(hr) || !canConvert ) + if ( filter & TEX_FILTER_FORCE_WIC ) { - // This case is not an issue for the subset of WIC formats that map directly to DXGI - return E_UNEXPECTED; + // Explicit flag to use WIC code paths, skips all the case checks below + return true; } - ScratchImage temp; - hr = temp.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 ); - if ( FAILED(hr) ) - return hr; + if ( filter & TEX_FILTER_SEPARATE_ALPHA ) + { + // Alpha is not premultiplied, so use non-WIC code paths + return false; + } - const Image *timg = temp.GetImage( 0, 0, 0 ); - if ( !timg ) - return E_POINTER; +#if defined(_XBOX_ONE) && defined(_TITLE) + if ( sformat == DXGI_FORMAT_R16G16B16A16_FLOAT + || sformat == DXGI_FORMAT_R16_FLOAT + || tformat == DXGI_FORMAT_R16G16B16A16_FLOAT + || tformat == DXGI_FORMAT_R16_FLOAT ) + { + // Use non-WIC code paths as these conversions are not supported by Xbox One XDK + return false; + } +#endif - ScopedObject<IWICBitmap> source; - hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID, - static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ), - srcImage.pixels, &source ); - if ( FAILED(hr) ) - return hr; + // Check for special cases + switch ( sformat ) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + switch( tformat ) + { + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_D32_FLOAT: + // WIC converts via UNORM formats and ends up converting colorspaces for these cases + case DXGI_FORMAT_A8_UNORM: + // Conversion logic for these kinds of textures is unintuitive for WIC code paths + return false; + } + break; + + case DXGI_FORMAT_R16_FLOAT: + switch( tformat ) + { + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_D32_FLOAT: + // WIC converts via UNORM formats and ends up converting colorspaces for these cases + case DXGI_FORMAT_A8_UNORM: + // Conversion logic for these kinds of textures is unintuitive for WIC code paths + return false; + } + break; - hr = FC->Initialize( source.Get(), GUID_WICPixelFormat128bppRGBAFloat, _GetWICDither( filter ), 0, threshold, WICBitmapPaletteTypeCustom ); - if ( FAILED(hr) ) - return hr; + case DXGI_FORMAT_A8_UNORM: + // Conversion logic for these kinds of textures is unintuitive for WIC code paths + return false; - hr = FC->CopyPixels( 0, static_cast<UINT>( timg->rowPitch ), static_cast<UINT>( timg->slicePitch ), timg->pixels ); - if ( FAILED(hr) ) - return hr; + default: + switch( tformat ) + { + case DXGI_FORMAT_A8_UNORM: + // Conversion logic for these kinds of textures is unintuitive for WIC code paths + return false; + } + } + + // Check for implicit color space changes + if ( IsSRGB( sformat ) ) + filter |= TEX_FILTER_SRGB_IN; - // Perform conversion on temp image which is now in R32G32B32A32_FLOAT format to final image - uint8_t *pSrc = timg->pixels; - uint8_t *pDest = destImage.pixels; - if ( !pSrc || !pDest ) - return E_POINTER; + if ( IsSRGB( tformat ) ) + filter |= TEX_FILTER_SRGB_OUT; - for( size_t h = 0; h < srcImage.height; ++h ) + if ( (filter & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) == (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT) ) { - _ConvertScanline( reinterpret_cast<XMVECTOR*>(pSrc), srcImage.width, destImage.format, DXGI_FORMAT_R32G32B32A32_FLOAT, filter ); + filter &= ~(TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT); + } - if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, reinterpret_cast<const XMVECTOR*>(pSrc), srcImage.width ) ) - return E_FAIL; + DWORD wicsrgb = _CheckWICColorSpace( pfGUID, targetGUID ); - pSrc += timg->rowPitch; - pDest += destImage.rowPitch; + if ( wicsrgb != (filter & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) ) + { + // WIC will perform a colorspace conversion we didn't request + return false; } - return S_OK; + return true; } //------------------------------------------------------------------------------------- -// Convert the source from DXGI format then use WIC to convert to final format +// Convert the source image using WIC //------------------------------------------------------------------------------------- -static HRESULT _ConvertToWIC( _In_ const Image& srcImage, - _In_ const WICPixelFormatGUID& targetGUID, _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage ) +static HRESULT _ConvertUsingWIC( _In_ const Image& srcImage, _In_ const WICPixelFormatGUID& pfGUID, + _In_ const WICPixelFormatGUID& targetGUID, + _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage ) { assert( srcImage.width == destImage.width ); assert( srcImage.height == destImage.height ); @@ -2149,101 +4292,275 @@ static HRESULT _ConvertToWIC( _In_ const Image& srcImage, if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICFormatConverter> FC; - HRESULT hr = pWIC->CreateFormatConverter( &FC ); + ComPtr<IWICFormatConverter> FC; + HRESULT hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; + // Note that WIC conversion ignores the TEX_FILTER_SRGB_IN and TEX_FILTER_SRGB_OUT flags, + // but also always assumes UNORM <-> FLOAT conversions are changing color spaces sRGB <-> scRGB + BOOL canConvert = FALSE; - hr = FC->CanConvert( GUID_WICPixelFormat128bppRGBAFloat, targetGUID, &canConvert ); + hr = FC->CanConvert( pfGUID, targetGUID, &canConvert ); if ( FAILED(hr) || !canConvert ) { // This case is not an issue for the subset of WIC formats that map directly to DXGI return E_UNEXPECTED; } - ScratchImage temp; - hr = temp.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 ); + ComPtr<IWICBitmap> source; + hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID, + static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ), + srcImage.pixels, source.GetAddressOf() ); if ( FAILED(hr) ) return hr; - const Image *timg = temp.GetImage( 0, 0, 0 ); - if ( !timg ) - return E_POINTER; + hr = FC->Initialize( source.Get(), targetGUID, _GetWICDither( filter ), 0, threshold * 100.f, WICBitmapPaletteTypeCustom ); + if ( FAILED(hr) ) + return hr; + + hr = FC->CopyPixels( 0, static_cast<UINT>( destImage.rowPitch ), static_cast<UINT>( destImage.slicePitch ), destImage.pixels ); + if ( FAILED(hr) ) + return hr; + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Convert the source image (not using WIC) +//------------------------------------------------------------------------------------- +static HRESULT _Convert( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage, _In_ float threshold, _In_ size_t z ) +{ + assert( srcImage.width == destImage.width ); + assert( srcImage.height == destImage.height ); const uint8_t *pSrc = srcImage.pixels; - if ( !pSrc ) + uint8_t *pDest = destImage.pixels; + if ( !pSrc || !pDest ) return E_POINTER; - uint8_t *pDest = timg->pixels; - if ( !pDest ) - return E_POINTER; + size_t width = srcImage.width; - for( size_t h = 0; h < srcImage.height; ++h ) + if ( filter & TEX_FILTER_DITHER_DIFFUSION ) { - if ( !_LoadScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) - return E_FAIL; + // Error diffusion dithering (aka Floyd-Steinberg dithering) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*(width*2 + 2)), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; - _ConvertScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.format, filter ); + XMVECTOR* pDiffusionErrors = scanline.get() + width; + memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(width+2) ); - pSrc += srcImage.rowPitch; - pDest += timg->rowPitch; + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) + return E_FAIL; + + _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); + + if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, pDiffusionErrors ) ) + return E_FAIL; + + pSrc += srcImage.rowPitch; + pDest += destImage.rowPitch; + } } + else + { + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; - // Perform conversion on temp image which is now in R32G32B32A32_FLOAT format - ScopedObject<IWICBitmap> source; - hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( timg->width ), static_cast<UINT>( timg->height ), GUID_WICPixelFormat128bppRGBAFloat, - static_cast<UINT>( timg->rowPitch ), static_cast<UINT>( timg->slicePitch ), - timg->pixels, &source ); - if ( FAILED(hr) ) - return hr; + if ( filter & TEX_FILTER_DITHER ) + { + // Ordered dithering + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) + return E_FAIL; - hr = FC->Initialize( source.Get(), targetGUID, _GetWICDither( filter ), 0, threshold, WICBitmapPaletteTypeCustom ); - if ( FAILED(hr) ) - return hr; + _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); - hr = FC->CopyPixels( 0, static_cast<UINT>( destImage.rowPitch ), static_cast<UINT>( destImage.slicePitch ), destImage.pixels ); - if ( FAILED(hr) ) - return hr; + if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, nullptr ) ) + return E_FAIL; + + pSrc += srcImage.rowPitch; + pDest += destImage.rowPitch; + } + } + else + { + // No dithering + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) ) + return E_FAIL; + + _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter ); + + if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold ) ) + return E_FAIL; + + pSrc += srcImage.rowPitch; + pDest += destImage.rowPitch; + } + } + } return S_OK; } //------------------------------------------------------------------------------------- -// Convert the source image (not using WIC) +static DXGI_FORMAT _PlanarToSingle( _In_ DXGI_FORMAT format ) +{ + switch (format) + { + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_NV11: + return DXGI_FORMAT_YUY2; + + case DXGI_FORMAT_P010: + return DXGI_FORMAT_Y210; + + case DXGI_FORMAT_P016: + return DXGI_FORMAT_Y216; + + // We currently do not support conversion for Xbox One specific depth formats + + // We can't do anything with DXGI_FORMAT_420_OPAQUE because it's an opaque blob of bits + + default: + return DXGI_FORMAT_UNKNOWN; + } +} + + +//------------------------------------------------------------------------------------- +// Convert the image from a planar to non-planar image //------------------------------------------------------------------------------------- -static HRESULT _Convert( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +#define CONVERT_420_TO_422( srcType, destType )\ + {\ + size_t rowPitch = srcImage.rowPitch;\ + \ + auto sourceE = reinterpret_cast<const srcType*>( pSrc + srcImage.slicePitch );\ + auto pSrcUV = pSrc + ( srcImage.height * rowPitch );\ + \ + for( size_t y = 0; y < srcImage.height; y+= 2 )\ + {\ + auto sPtrY0 = reinterpret_cast<const srcType*>( pSrc );\ + auto sPtrY2 = reinterpret_cast<const srcType*>( pSrc + rowPitch );\ + auto sPtrUV = reinterpret_cast<const srcType*>( pSrcUV );\ + \ + destType * __restrict dPtr0 = reinterpret_cast<destType*>(pDest);\ + destType * __restrict dPtr1 = reinterpret_cast<destType*>(pDest + destImage.rowPitch);\ + \ + for( size_t x = 0; x < srcImage.width; x+= 2 )\ + {\ + if ( (sPtrUV+1) >= sourceE ) break;\ + \ + srcType u = *(sPtrUV++);\ + srcType v = *(sPtrUV++);\ + \ + dPtr0->x = *(sPtrY0++);\ + dPtr0->y = u;\ + dPtr0->z = *(sPtrY0++);\ + dPtr0->w = v;\ + ++dPtr0;\ + \ + dPtr1->x = *(sPtrY2++);\ + dPtr1->y = u;\ + dPtr1->z = *(sPtrY2++);\ + dPtr1->w = v;\ + ++dPtr1;\ + }\ + \ + pSrc += rowPitch * 2;\ + pSrcUV += rowPitch;\ + \ + pDest += destImage.rowPitch * 2;\ + }\ + } + +static HRESULT _ConvertToSinglePlane( _In_ const Image& srcImage, _In_ const Image& destImage ) { assert( srcImage.width == destImage.width ); assert( srcImage.height == destImage.height ); - ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*srcImage.width), 16 ) ) ); - if ( !scanline ) - return E_OUTOFMEMORY; - const uint8_t *pSrc = srcImage.pixels; uint8_t *pDest = destImage.pixels; if ( !pSrc || !pDest ) return E_POINTER; - for( size_t h = 0; h < srcImage.height; ++h ) + switch ( srcImage.format ) { - if ( !_LoadScanline( scanline.get(), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) - return E_FAIL; - - _ConvertScanline( scanline.get(), srcImage.width, destImage.format, srcImage.format, filter ); - - if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, scanline.get(), srcImage.width ) ) - return E_FAIL; + case DXGI_FORMAT_NV12: + assert( destImage.format == DXGI_FORMAT_YUY2 ); + CONVERT_420_TO_422( uint8_t, XMUBYTEN4 ); + return S_OK; + + case DXGI_FORMAT_P010: + assert( destImage.format == DXGI_FORMAT_Y210 ); + CONVERT_420_TO_422( uint16_t, XMUSHORTN4 ); + return S_OK; + + case DXGI_FORMAT_P016: + assert( destImage.format == DXGI_FORMAT_Y216 ); + CONVERT_420_TO_422( uint16_t, XMUSHORTN4 ); + return S_OK; + + case DXGI_FORMAT_NV11: + assert( destImage.format == DXGI_FORMAT_YUY2 ); + // Convert 4:1:1 to 4:2:2 + { + size_t rowPitch = srcImage.rowPitch; + + const uint8_t* sourceE = pSrc + srcImage.slicePitch; + const uint8_t* pSrcUV = pSrc + ( srcImage.height * rowPitch ); + + for( size_t y = 0; y < srcImage.height; ++y ) + { + const uint8_t* sPtrY = pSrc; + const uint8_t* sPtrUV = pSrcUV; + + XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDest); + + for( size_t x = 0; x < srcImage.width; x+= 4 ) + { + if ( (sPtrUV+1) >= sourceE ) break; + + uint8_t u = *(sPtrUV++); + uint8_t v = *(sPtrUV++); + + dPtr->x = *(sPtrY++); + dPtr->y = u; + dPtr->z = *(sPtrY++); + dPtr->w = v; + ++dPtr; + + dPtr->x = *(sPtrY++); + dPtr->y = u; + dPtr->z = *(sPtrY++); + dPtr->w = v; + ++dPtr; + } + + pSrc += rowPitch; + pSrcUV += (rowPitch >> 1); + + pDest += destImage.rowPitch; + } + } + return S_OK; - pSrc += srcImage.rowPitch; - pDest += destImage.rowPitch; + default: + return E_UNEXPECTED; } - - return S_OK; } +#undef CONVERT_420_TO_422 + //===================================================================================== // Entry-points @@ -2252,6 +4569,7 @@ static HRESULT _Convert( _In_ const Image& srcImage, _In_ DWORD filter, _In_ con //------------------------------------------------------------------------------------- // Convert image //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& image ) { if ( (srcImage.format == format) || !IsValid( format ) ) @@ -2261,11 +4579,12 @@ HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float return E_POINTER; if ( IsCompressed(srcImage.format) || IsCompressed(format) - || IsVideo(srcImage.format) || IsVideo(format) + || IsPlanar(srcImage.format) || IsPlanar(format) + || IsPalettized(srcImage.format) || IsPalettized(format) || IsTypeless(srcImage.format) || IsTypeless(format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); -#ifdef _AMD64_ +#ifdef _M_X64 if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif @@ -2281,34 +4600,14 @@ HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float return E_POINTER; } - WICPixelFormatGUID pfGUID; - if ( _DXGIToWIC( srcImage.format, pfGUID ) ) + WICPixelFormatGUID pfGUID, targetGUID; + if ( _UseWICConversion( filter, srcImage.format, format, pfGUID, targetGUID ) ) { - WICPixelFormatGUID targetGUID; - if ( _DXGIToWIC( format, targetGUID ) ) - { - // Case 1: Both source and target formats are WIC supported - hr = _ConvertUsingWIC( srcImage, pfGUID, targetGUID, filter, threshold, *rimage ); - } - else - { - // Case 2: Source format is supported by WIC, but not the target format - hr = _ConvertFromWIC( srcImage, pfGUID, filter, threshold, *rimage ); - } + hr = _ConvertUsingWIC( srcImage, pfGUID, targetGUID, filter, threshold, *rimage ); } else { - WICPixelFormatGUID targetGUID; - if ( _DXGIToWIC( format, targetGUID ) ) - { - // Case 3: Source format is not supported by WIC, but does support the target format - hr = _ConvertToWIC( srcImage, targetGUID, filter, threshold, *rimage ); - } - else - { - // Case 4: Both source and target format are not supported by WIC - hr = _Convert( srcImage, filter, *rimage ); - } + hr = _Convert( srcImage, filter, *rimage, threshold, 0 ); } if ( FAILED(hr) ) @@ -2324,6 +4623,7 @@ HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float //------------------------------------------------------------------------------------- // Convert image (complex) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& result ) { @@ -2331,11 +4631,12 @@ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& meta return E_INVALIDARG; if ( IsCompressed(metadata.format) || IsCompressed(format) - || IsVideo(metadata.format) || IsVideo(format) + || IsPlanar(metadata.format) || IsPlanar(format) + || IsPalettized(metadata.format) || IsPalettized(format) || IsTypeless(metadata.format) || IsTypeless(format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); -#ifdef _AMD64_ +#ifdef _M_X64 if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif @@ -2360,8 +4661,200 @@ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& meta } WICPixelFormatGUID pfGUID, targetGUID; - bool wicpf = _DXGIToWIC( metadata.format, pfGUID ); - bool wictargetpf = _DXGIToWIC( format, targetGUID ); + bool usewic = _UseWICConversion( filter, metadata.format, format, pfGUID, targetGUID ); + + switch (metadata.dimension) + { + case TEX_DIMENSION_TEXTURE1D: + case TEX_DIMENSION_TEXTURE2D: + for( size_t index=0; index < nimages; ++index ) + { + const Image& src = srcImages[ index ]; + if ( src.format != metadata.format ) + { + result.Release(); + return E_FAIL; + } + +#ifdef _M_X64 + if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) + return E_FAIL; +#endif + + const Image& dst = dest[ index ]; + assert( dst.format == format ); + + if ( src.width != dst.width || src.height != dst.height ) + { + result.Release(); + return E_FAIL; + } + + if ( usewic ) + { + hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst ); + } + else + { + hr = _Convert( src, filter, dst, threshold, 0 ); + } + + if ( FAILED(hr) ) + { + result.Release(); + return hr; + } + } + break; + + case TEX_DIMENSION_TEXTURE3D: + { + size_t index = 0; + size_t d = metadata.depth; + for( size_t level = 0; level < metadata.mipLevels; ++level ) + { + for( size_t slice = 0; slice < d; ++slice, ++index ) + { + if ( index >= nimages ) + return E_FAIL; + + const Image& src = srcImages[ index ]; + if ( src.format != metadata.format ) + { + result.Release(); + return E_FAIL; + } + +#ifdef _M_X64 + if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) + return E_FAIL; +#endif + + const Image& dst = dest[ index ]; + assert( dst.format == format ); + + if ( src.width != dst.width || src.height != dst.height ) + { + result.Release(); + return E_FAIL; + } + + if ( usewic ) + { + hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst ); + } + else + { + hr = _Convert( src, filter, dst, threshold, slice ); + } + + if ( FAILED(hr) ) + { + result.Release(); + return hr; + } + } + + if ( d > 1 ) + d >>= 1; + } + } + break; + + default: + return E_FAIL; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Convert image from planar to single plane (image) +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT ConvertToSinglePlane( const Image& srcImage, ScratchImage& image ) +{ + if ( !IsPlanar(srcImage.format) ) + return E_INVALIDARG; + + if ( !srcImage.pixels ) + return E_POINTER; + + DXGI_FORMAT format = _PlanarToSingle( srcImage.format ); + if ( format == DXGI_FORMAT_UNKNOWN ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + +#ifdef _M_X64 + if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) + return E_INVALIDARG; +#endif + + HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 ); + if ( FAILED(hr) ) + return hr; + + const Image *rimage = image.GetImage( 0, 0, 0 ); + if ( !rimage ) + { + image.Release(); + return E_POINTER; + } + + hr = _ConvertToSinglePlane( srcImage, *rimage ); + if ( FAILED(hr) ) + { + image.Release(); + return hr; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Convert image from planar to single plane (complex) +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT ConvertToSinglePlane( const Image* srcImages, size_t nimages, const TexMetadata& metadata, + ScratchImage& result ) +{ + if ( !srcImages || !nimages ) + return E_INVALIDARG; + + if ( metadata.IsVolumemap() ) + { + // Direct3D does not support any planar formats for Texture3D + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + + DXGI_FORMAT format = _PlanarToSingle( metadata.format ); + if ( format == DXGI_FORMAT_UNKNOWN ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + +#ifdef _M_X64 + if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) ) + return E_INVALIDARG; +#endif + + TexMetadata mdata2 = metadata; + mdata2.format = format; + HRESULT hr = result.Initialize( mdata2 ); + if ( FAILED(hr) ) + return hr; + + if ( nimages != result.GetImageCount() ) + { + result.Release(); + return E_FAIL; + } + + const Image* dest = result.GetImages(); + if ( !dest ) + { + result.Release(); + return E_POINTER; + } for( size_t index=0; index < nimages; ++index ) { @@ -2372,7 +4865,7 @@ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& meta return E_FAIL; } -#ifdef _AMD64_ +#ifdef _M_X64 if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) return E_FAIL; #endif @@ -2386,33 +4879,7 @@ HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& meta return E_FAIL; } - if ( wicpf ) - { - if ( wictargetpf ) - { - // Case 1: Both source and target formats are WIC supported - hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst ); - } - else - { - // Case 2: Source format is supported by WIC, but not the target format - hr = _ConvertFromWIC( src, pfGUID, filter, threshold, dst ); - } - } - else - { - if ( wictargetpf ) - { - // Case 3: Source format is not supported by WIC, but does support the target format - hr = _ConvertToWIC( src, targetGUID, filter, threshold, dst ); - } - else - { - // Case 4: Both source and target format are not supported by WIC - hr = _Convert( src, filter, dst ); - } - } - + hr = _ConvertToSinglePlane( src, dst ); if ( FAILED(hr) ) { result.Release(); diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexD3D11.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexD3D11.cpp index e640dfd0..38e4fcc7 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexD3D11.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexD3D11.cpp @@ -15,7 +15,11 @@ #include "DirectXTexP.h" +#if !defined(_XBOX_ONE) || !defined(_TITLE) #include <d3d10.h> +#endif + +using Microsoft::WRL::ComPtr; namespace DirectX { @@ -26,7 +30,34 @@ static HRESULT _Capture( _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource if ( !pContext || !pSource || !result.GetPixels() ) return E_POINTER; - if ( metadata.dimension == TEX_DIMENSION_TEXTURE3D ) +#if defined(_XBOX_ONE) && defined(_TITLE) + + ComPtr<ID3D11Device> d3dDevice; + pContext->GetDevice( d3dDevice.GetAddressOf() ); + + if ( d3dDevice->GetCreationFlags() & D3D11_CREATE_DEVICE_IMMEDIATE_CONTEXT_FAST_SEMANTICS ) + { + ComPtr<ID3D11DeviceX> d3dDeviceX; + HRESULT hr = d3dDevice.As( &d3dDeviceX ); + if ( FAILED(hr) ) + return hr; + + ComPtr<ID3D11DeviceContextX> d3dContextX; + hr = pContext->QueryInterface( __uuidof(ID3D11DeviceContextX), reinterpret_cast<void**>( d3dContextX.GetAddressOf() ) ); + if ( FAILED(hr) ) + return hr; + + UINT64 copyFence = d3dContextX->InsertFence(0); + + while ( d3dDeviceX->IsFencePending( copyFence ) ) + { + SwitchToThread(); + } + } + +#endif + + if ( metadata.IsVolumemap() ) { //--- Volume texture ---------------------------------------------------------- assert( metadata.arraySize == 1 ); @@ -43,7 +74,7 @@ static HRESULT _Capture( _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource if ( FAILED(hr) ) return hr; - const uint8_t* pslice = reinterpret_cast<const uint8_t*>( mapped.pData ); + auto pslice = reinterpret_cast<const uint8_t*>( mapped.pData ); if ( !pslice ) { pContext->Unmap( pSource, dindex ); @@ -51,6 +82,11 @@ static HRESULT _Capture( _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource } size_t lines = ComputeScanlines( metadata.format, height ); + if ( !lines ) + { + pContext->Unmap( pSource, dindex ); + return E_UNEXPECTED; + } for( size_t slice = 0; slice < depth; ++slice ) { @@ -120,8 +156,13 @@ static HRESULT _Capture( _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource } size_t lines = ComputeScanlines( metadata.format, height ); + if ( !lines ) + { + pContext->Unmap( pSource, dindex ); + return E_UNEXPECTED; + } - const uint8_t* sptr = reinterpret_cast<const uint8_t*>( mapped.pData ); + auto sptr = reinterpret_cast<const uint8_t*>( mapped.pData ); uint8_t* dptr = img->pixels; for( size_t h = 0; h < lines; ++h ) { @@ -150,6 +191,7 @@ static HRESULT _Capture( _In_ ID3D11DeviceContext* pContext, _In_ ID3D11Resource //------------------------------------------------------------------------------------- // Determine if given texture metadata is supported on the given device //------------------------------------------------------------------------------------- +_Use_decl_annotations_ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) { if ( !pDevice ) @@ -163,9 +205,6 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) if ( !IsValid( fmt ) ) return false; - if ( IsVideo(fmt) ) - return false; - switch( fmt ) { case DXGI_FORMAT_BC4_TYPELESS: @@ -201,7 +240,11 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) // Most cases are known apriori based on feature level, but we use this for robustness to handle the few optional cases UINT formatSupport = 0; - pDevice->CheckFormatSupport( fmt, &formatSupport ); + HRESULT hr = pDevice->CheckFormatSupport( fmt, &formatSupport ); + if ( FAILED(hr) ) + { + formatSupport = 0; + } switch ( metadata.dimension ) { @@ -221,17 +264,17 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) if ( fl < D3D_FEATURE_LEVEL_10_0 ) { - if ( (arraySize > 1) || (iWidth > 4096 /*D3D_FL9_3_REQ_TEXTURE1D_U_DIMENSION*/) ) + if ( (arraySize > 1) || (iWidth > D3D_FL9_3_REQ_TEXTURE1D_U_DIMENSION) ) return false; - if ( (fl < D3D_FEATURE_LEVEL_9_3) && (iWidth > 2048 /*D3D_FL9_1_REQ_TEXTURE1D_U_DIMENSION*/ ) ) + if ( (fl < D3D_FEATURE_LEVEL_9_3) && (iWidth > D3D_FL9_1_REQ_TEXTURE1D_U_DIMENSION ) ) return false; } } break; case TEX_DIMENSION_TEXTURE2D: - if ( metadata.miscFlags & TEX_MISC_TEXTURECUBE ) + if ( metadata.IsCubemap() ) { if ( !(formatSupport & D3D11_FORMAT_SUPPORT_TEXTURECUBE) ) return false; @@ -253,13 +296,13 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) if ( fl < D3D_FEATURE_LEVEL_10_0 ) { - if ( (iWidth > 4096 /*D3D_FL9_3_REQ_TEXTURECUBE_DIMENSION*/ ) - || (iHeight > 4096 /*D3D_FL9_3_REQ_TEXTURECUBE_DIMENSION*/ ) ) + if ( (iWidth > D3D_FL9_3_REQ_TEXTURECUBE_DIMENSION ) + || (iHeight > D3D_FL9_3_REQ_TEXTURECUBE_DIMENSION ) ) return false; if ( (fl < D3D_FEATURE_LEVEL_9_3) - && ( (iWidth > 512 /*D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION*/) - || (iHeight > 512 /*D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION*/) ) ) + && ( (iWidth > D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION) + || (iHeight > D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION) ) ) return false; } } @@ -284,13 +327,13 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) if ( fl < D3D_FEATURE_LEVEL_10_0 ) { if ( (arraySize > 1) - || (iWidth > 4096 /*D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION*/) - || (iHeight > 4096 /*D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION*/) ) + || (iWidth > D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION) + || (iHeight > D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION) ) return false; if ( (fl < D3D_FEATURE_LEVEL_9_3) - && ( (iWidth > 2048 /*D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION*/) - || (iHeight > 2048 /*D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION*/) ) ) + && ( (iWidth > D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION) + || (iHeight > D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION) ) ) return false; } } @@ -316,9 +359,9 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) if ( fl < D3D_FEATURE_LEVEL_10_0 ) { - if ( (iWidth > 256 /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/) - || (iHeight > 256 /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/) - || (iDepth > 256 /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/) ) + if ( (iWidth > D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) + || (iHeight > D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) + || (iDepth > D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) ) return false; } } @@ -336,33 +379,46 @@ bool IsSupportedTexture( ID3D11Device* pDevice, const TexMetadata& metadata ) //------------------------------------------------------------------------------------- // Create a texture resource //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata, ID3D11Resource** ppResource ) { + return CreateTextureEx( pDevice, srcImages, nimages, metadata, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, false, + ppResource ); +} + +_Use_decl_annotations_ +HRESULT CreateTextureEx( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata, + D3D11_USAGE usage, unsigned int bindFlags, unsigned int cpuAccessFlags, unsigned int miscFlags, bool forceSRGB, + ID3D11Resource** ppResource ) +{ if ( !pDevice || !srcImages || !nimages || !ppResource ) return E_INVALIDARG; + *ppResource = nullptr; + if ( !metadata.mipLevels || !metadata.arraySize ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) || (metadata.mipLevels > 0xFFFFFFFF) || (metadata.arraySize > 0xFFFFFFFF) ) return E_INVALIDARG; #endif - std::unique_ptr<D3D11_SUBRESOURCE_DATA[]> initData( new D3D11_SUBRESOURCE_DATA[ metadata.mipLevels * metadata.arraySize ] ); + std::unique_ptr<D3D11_SUBRESOURCE_DATA[]> initData( new (std::nothrow) D3D11_SUBRESOURCE_DATA[ metadata.mipLevels * metadata.arraySize ] ); if ( !initData ) return E_OUTOFMEMORY; // Fill out subresource array - if ( metadata.dimension == TEX_DIMENSION_TEXTURE3D ) + if ( metadata.IsVolumemap() ) { //--- Volume case ------------------------------------------------------------- if ( !metadata.depth ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( metadata.depth > 0xFFFFFFFF ) return E_INVALIDARG; #endif @@ -456,6 +512,8 @@ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nim // Create texture using static initialization data HRESULT hr = E_FAIL; + DXGI_FORMAT tformat = ( forceSRGB ) ? MakeSRGB( metadata.format ) : metadata.format; + switch ( metadata.dimension ) { case TEX_DIMENSION_TEXTURE1D: @@ -464,11 +522,11 @@ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nim desc.Width = static_cast<UINT>( metadata.width ); desc.MipLevels = static_cast<UINT>( metadata.mipLevels ); desc.ArraySize = static_cast<UINT>( metadata.arraySize ); - desc.Format = metadata.format; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; + desc.Format = tformat; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + desc.MiscFlags = miscFlags & ~D3D11_RESOURCE_MISC_TEXTURECUBE; hr = pDevice->CreateTexture1D( &desc, initData.get(), reinterpret_cast<ID3D11Texture1D**>(ppResource) ); } @@ -481,13 +539,16 @@ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nim desc.Height = static_cast<UINT>( metadata.height ); desc.MipLevels = static_cast<UINT>( metadata.mipLevels ); desc.ArraySize = static_cast<UINT>( metadata.arraySize ); - desc.Format = metadata.format; + desc.Format = tformat; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = (metadata.miscFlags & TEX_MISC_TEXTURECUBE) ? D3D11_RESOURCE_MISC_TEXTURECUBE : 0; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + if ( metadata.IsCubemap() ) + desc.MiscFlags = miscFlags | D3D11_RESOURCE_MISC_TEXTURECUBE; + else + desc.MiscFlags = miscFlags & ~D3D11_RESOURCE_MISC_TEXTURECUBE; hr = pDevice->CreateTexture2D( &desc, initData.get(), reinterpret_cast<ID3D11Texture2D**>(ppResource) ); } @@ -500,11 +561,11 @@ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nim desc.Height = static_cast<UINT>( metadata.height ); desc.Depth = static_cast<UINT>( metadata.depth ); desc.MipLevels = static_cast<UINT>( metadata.mipLevels ); - desc.Format = metadata.format; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; + desc.Format = tformat; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + desc.MiscFlags = miscFlags & ~D3D11_RESOURCE_MISC_TEXTURECUBE; hr = pDevice->CreateTexture3D( &desc, initData.get(), reinterpret_cast<ID3D11Texture3D**>(ppResource) ); } @@ -518,71 +579,89 @@ HRESULT CreateTexture( ID3D11Device* pDevice, const Image* srcImages, size_t nim //------------------------------------------------------------------------------------- // Create a shader resource view and associated texture //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT CreateShaderResourceView( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata, ID3D11ShaderResourceView** ppSRV ) { + return CreateShaderResourceViewEx( pDevice, srcImages, nimages, metadata, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, false, + ppSRV ); +} + +_Use_decl_annotations_ +HRESULT CreateShaderResourceViewEx( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata, + D3D11_USAGE usage, unsigned int bindFlags, unsigned int cpuAccessFlags, unsigned int miscFlags, bool forceSRGB, + ID3D11ShaderResourceView** ppSRV ) +{ if ( !ppSRV ) return E_INVALIDARG; - ScopedObject<ID3D11Resource> resource; - HRESULT hr = CreateTexture( pDevice, srcImages, nimages, metadata, &resource ); + *ppSRV = nullptr; + + ComPtr<ID3D11Resource> resource; + HRESULT hr = CreateTextureEx( pDevice, srcImages, nimages, metadata, + usage, bindFlags, cpuAccessFlags, miscFlags, forceSRGB, + resource.GetAddressOf() ); if ( FAILED(hr) ) return hr; - assert( !resource.IsNull() ); + assert( resource ); D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc; memset( &SRVDesc, 0, sizeof(SRVDesc) ); - SRVDesc.Format = metadata.format; + if ( forceSRGB ) + SRVDesc.Format = MakeSRGB( metadata.format ); + else + SRVDesc.Format = metadata.format; switch ( metadata.dimension ) { case TEX_DIMENSION_TEXTURE1D: if ( metadata.arraySize > 1 ) { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURE1DARRAY; SRVDesc.Texture1DArray.MipLevels = static_cast<UINT>( metadata.mipLevels ); SRVDesc.Texture1DArray.ArraySize = static_cast<UINT>( metadata.arraySize ); } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURE1D; SRVDesc.Texture1D.MipLevels = static_cast<UINT>( metadata.mipLevels ); } break; case TEX_DIMENSION_TEXTURE2D: - if ( metadata.miscFlags & TEX_MISC_TEXTURECUBE ) + if ( metadata.IsCubemap() ) { if (metadata.arraySize > 6) { assert( (metadata.arraySize % 6) == 0 ); - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURECUBEARRAY; SRVDesc.TextureCubeArray.MipLevels = static_cast<UINT>( metadata.mipLevels ); SRVDesc.TextureCubeArray.NumCubes = static_cast<UINT>( metadata.arraySize / 6 ); } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURECUBE; SRVDesc.TextureCube.MipLevels = static_cast<UINT>( metadata.mipLevels ); } } else if ( metadata.arraySize > 1 ) { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURE2DARRAY; SRVDesc.Texture2DArray.MipLevels = static_cast<UINT>( metadata.mipLevels ); SRVDesc.Texture2DArray.ArraySize = static_cast<UINT>( metadata.arraySize ); } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURE2D; SRVDesc.Texture2D.MipLevels = static_cast<UINT>( metadata.mipLevels ); } break; case TEX_DIMENSION_TEXTURE3D: assert( metadata.arraySize == 1 ); - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + SRVDesc.ViewDimension = D3D_SRV_DIMENSION_TEXTURE3D; SRVDesc.Texture3D.MipLevels = static_cast<UINT>( metadata.mipLevels ); break; @@ -603,6 +682,7 @@ HRESULT CreateShaderResourceView( ID3D11Device* pDevice, const Image* srcImages, //------------------------------------------------------------------------------------- // Save a texture resource to a DDS file in memory/on disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID3D11Resource* pSource, ScratchImage& result ) { if ( !pDevice || !pContext || !pSource ) @@ -617,12 +697,12 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID { case D3D11_RESOURCE_DIMENSION_TEXTURE1D: { - ScopedObject<ID3D11Texture1D> pTexture; - hr = pSource->QueryInterface( __uuidof(ID3D11Texture1D), (void**) &pTexture ); + ComPtr<ID3D11Texture1D> pTexture; + hr = pSource->QueryInterface( __uuidof(ID3D11Texture1D), reinterpret_cast<void**>( pTexture.GetAddressOf() ) ); if ( FAILED(hr) ) break; - assert( pTexture.Get() ); + assert( pTexture ); D3D11_TEXTURE1D_DESC desc; pTexture->GetDesc( &desc ); @@ -632,12 +712,12 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; - ScopedObject<ID3D11Texture1D> pStaging; - hr = pDevice->CreateTexture1D( &desc, 0, &pStaging ); + ComPtr<ID3D11Texture1D> pStaging; + hr = pDevice->CreateTexture1D( &desc, 0, pStaging.GetAddressOf() ); if ( FAILED(hr) ) break; - assert( pStaging.Get() ); + assert( pStaging ); pContext->CopyResource( pStaging.Get(), pSource ); @@ -647,6 +727,7 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID mdata.arraySize = desc.ArraySize; mdata.mipLevels = desc.MipLevels; mdata.miscFlags = 0; + mdata.miscFlags2 = 0; mdata.format = desc.Format; mdata.dimension = TEX_DIMENSION_TEXTURE1D; @@ -660,28 +741,28 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID case D3D11_RESOURCE_DIMENSION_TEXTURE2D: { - ScopedObject<ID3D11Texture2D> pTexture; - hr = pSource->QueryInterface( __uuidof(ID3D11Texture2D), (void**) &pTexture ); + ComPtr<ID3D11Texture2D> pTexture; + hr = pSource->QueryInterface( __uuidof(ID3D11Texture2D), reinterpret_cast<void**>( pTexture.GetAddressOf() ) ); if ( FAILED(hr) ) break; - assert( pTexture.Get() ); + assert( pTexture ); D3D11_TEXTURE2D_DESC desc; pTexture->GetDesc( &desc ); - ScopedObject<ID3D11Texture2D> pStaging; + ComPtr<ID3D11Texture2D> pStaging; if ( desc.SampleDesc.Count > 1 ) { desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; - ScopedObject<ID3D11Texture2D> pTemp; - hr = pDevice->CreateTexture2D( &desc, 0, &pTemp ); + ComPtr<ID3D11Texture2D> pTemp; + hr = pDevice->CreateTexture2D( &desc, 0, pTemp.GetAddressOf() ); if ( FAILED(hr) ) break; - assert( pTemp.Get() ); + assert( pTemp ); DXGI_FORMAT fmt = desc.Format; if ( IsTypeless(fmt) ) @@ -707,7 +788,6 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID for( UINT level = 0; level < desc.MipLevels; ++level ) { UINT index = D3D11CalcSubresource( level, item, desc.MipLevels ); - pContext->ResolveSubresource( pTemp.Get(), index, pSource, index, fmt ); } } @@ -717,11 +797,11 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; - hr = pDevice->CreateTexture2D( &desc, 0, &pStaging ); + hr = pDevice->CreateTexture2D( &desc, 0, pStaging.GetAddressOf() ); if ( FAILED(hr) ) break; - assert( pStaging.Get() ); + assert( pStaging ); pContext->CopyResource( pStaging.Get(), pTemp.Get() ); } @@ -736,7 +816,7 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID if ( FAILED(hr) ) break; - assert( pStaging.Get() ); + assert( pStaging ); pContext->CopyResource( pStaging.Get(), pSource ); } @@ -748,6 +828,7 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID mdata.arraySize = desc.ArraySize; mdata.mipLevels = desc.MipLevels; mdata.miscFlags = (desc.MiscFlags & D3D11_RESOURCE_MISC_TEXTURECUBE) ? TEX_MISC_TEXTURECUBE : 0; + mdata.miscFlags2 = 0; mdata.format = desc.Format; mdata.dimension = TEX_DIMENSION_TEXTURE2D; @@ -761,12 +842,12 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID case D3D11_RESOURCE_DIMENSION_TEXTURE3D: { - ScopedObject<ID3D11Texture3D> pTexture; - hr = pSource->QueryInterface( __uuidof(ID3D11Texture3D), (void**) &pTexture ); + ComPtr<ID3D11Texture3D> pTexture; + hr = pSource->QueryInterface( __uuidof(ID3D11Texture3D), reinterpret_cast<void**>( pTexture.GetAddressOf() ) ); if ( FAILED(hr) ) break; - assert( pTexture.Get() ); + assert( pTexture ); D3D11_TEXTURE3D_DESC desc; pTexture->GetDesc( &desc ); @@ -776,12 +857,12 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; - ScopedObject<ID3D11Texture3D> pStaging; - hr = pDevice->CreateTexture3D( &desc, 0, &pStaging ); + ComPtr<ID3D11Texture3D> pStaging; + hr = pDevice->CreateTexture3D( &desc, 0, pStaging.GetAddressOf() ); if ( FAILED(hr) ) break; - assert( pStaging.Get() ); + assert( pStaging ); pContext->CopyResource( pStaging.Get(), pSource ); @@ -792,6 +873,7 @@ HRESULT CaptureTexture( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID mdata.arraySize = 1; mdata.mipLevels = desc.MipLevels; mdata.miscFlags = 0; + mdata.miscFlags2 = 0; mdata.format = desc.Format; mdata.dimension = TEX_DIMENSION_TEXTURE3D; diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexDDS.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexDDS.cpp index d249b4fe..67d2e825 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexDDS.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexDDS.cpp @@ -39,6 +39,10 @@ enum CONVERSION_FLAGS CONV_FLAGS_8332 = 0x400, // Source is a 8:3:3:2 (16bpp) format CONV_FLAGS_A8P8 = 0x800, // Has an 8-bit palette with an alpha channel CONV_FLAGS_DX10 = 0x10000, // Has the 'DX10' extension header + CONV_FLAGS_PMALPHA = 0x20000, // Contains premultiplied alpha data + CONV_FLAGS_L8 = 0x40000, // Source is a 8 luminance format + CONV_FLAGS_L16 = 0x80000, // Source is a 16 luminance format + CONV_FLAGS_A8L8 = 0x100000, // Source is a 8:8 luminance format }; struct LegacyDDS @@ -54,8 +58,8 @@ const LegacyDDS g_LegacyDDSMap[] = { DXGI_FORMAT_BC2_UNORM, CONV_FLAGS_NONE, DDSPF_DXT3 }, // D3DFMT_DXT3 { DXGI_FORMAT_BC3_UNORM, CONV_FLAGS_NONE, DDSPF_DXT5 }, // D3DFMT_DXT5 - { DXGI_FORMAT_BC2_UNORM, CONV_FLAGS_NONE, DDSPF_DXT2 }, // D3DFMT_DXT2 (ignore premultiply) - { DXGI_FORMAT_BC3_UNORM, CONV_FLAGS_NONE, DDSPF_DXT4 }, // D3DFMT_DXT4 (ignore premultiply) + { DXGI_FORMAT_BC2_UNORM, CONV_FLAGS_PMALPHA, DDSPF_DXT2 }, // D3DFMT_DXT2 + { DXGI_FORMAT_BC3_UNORM, CONV_FLAGS_PMALPHA, DDSPF_DXT4 }, // D3DFMT_DXT4 { DXGI_FORMAT_BC4_UNORM, CONV_FLAGS_NONE, DDSPF_BC4_UNORM }, { DXGI_FORMAT_BC4_SNORM, CONV_FLAGS_NONE, DDSPF_BC4_SNORM }, @@ -114,21 +118,14 @@ const LegacyDDS g_LegacyDDSMap[] = { DXGI_FORMAT_R8G8B8A8_UNORM, CONV_FLAGS_EXPAND | CONV_FLAGS_PAL8, { sizeof(DDS_PIXELFORMAT), DDS_PAL8, 0, 8, 0, 0, 0, 0 } }, // D3DFMT_P8 -#ifdef DXGI_1_2_FORMATS { DXGI_FORMAT_B4G4R4A4_UNORM, CONV_FLAGS_4444, DDSPF_A4R4G4B4 }, // D3DFMT_A4R4G4B4 (uses DXGI 1.2 format) { DXGI_FORMAT_B4G4R4A4_UNORM, CONV_FLAGS_NOALPHA | CONV_FLAGS_4444, { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 16, 0x0f00, 0x00f0, 0x000f, 0x0000 } }, // D3DFMT_X4R4G4B4 (uses DXGI 1.2 format) { DXGI_FORMAT_B4G4R4A4_UNORM, CONV_FLAGS_EXPAND | CONV_FLAGS_44, { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCE, 0, 8, 0x0f, 0x00, 0x00, 0xf0 } }, // D3DFMT_A4L4 (uses DXGI 1.2 format) -#else // !DXGI_1_2_FORMATS - { DXGI_FORMAT_R8G8B8A8_UNORM, CONV_FLAGS_EXPAND - | CONV_FLAGS_4444, DDSPF_A4R4G4B4 }, // D3DFMT_A4R4G4B4 - { DXGI_FORMAT_R8G8B8A8_UNORM, CONV_FLAGS_EXPAND - | CONV_FLAGS_NOALPHA - | CONV_FLAGS_4444, { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 16, 0x0f00, 0x00f0, 0x000f, 0x0000 } }, // D3DFMT_X4R4G4B4 - { DXGI_FORMAT_R8G8B8A8_UNORM, CONV_FLAGS_EXPAND - | CONV_FLAGS_44, { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCE, 0, 8, 0x0f, 0x00, 0x00, 0xf0 } }, // D3DFMT_A4L4 -#endif + + { DXGI_FORMAT_YUY2, CONV_FLAGS_NONE, DDSPF_YUY2 }, // D3DFMT_YUY2 (uses DXGI 1.2 format) + { DXGI_FORMAT_YUY2, CONV_FLAGS_SWIZZLE, { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('U','Y','V','Y'), 0, 0, 0, 0, 0 } }, // D3DFMT_UYVY (uses DXGI 1.2 format) }; // Note that many common DDS reader/writers (including D3DX) swap the @@ -140,13 +137,11 @@ const LegacyDDS g_LegacyDDSMap[] = // We do not support the following legacy Direct3D 9 formats: // BumpDuDv D3DFMT_V8U8, D3DFMT_Q8W8V8U8, D3DFMT_V16U16, D3DFMT_A2W10V10U10 // BumpLuminance D3DFMT_L6V5U5, D3DFMT_X8L8V8U8 -// FourCC "UYVY" D3DFMT_UYVY -// FourCC "YUY2" D3DFMT_YUY2 // FourCC 117 D3DFMT_CxV8U8 // ZBuffer D3DFMT_D16_LOCKABLE // FourCC 82 D3DFMT_D32F_LOCKABLE -static DXGI_FORMAT _GetDXGIFormat( const DDS_PIXELFORMAT& ddpf, DWORD flags, _Inout_opt_ DWORD* convFlags ) +static DXGI_FORMAT _GetDXGIFormat( const DDS_PIXELFORMAT& ddpf, DWORD flags, _Inout_ DWORD& convFlags ) { const size_t MAP_SIZE = sizeof(g_LegacyDDSMap) / sizeof(LegacyDDS); size_t index = 0; @@ -192,8 +187,7 @@ static DXGI_FORMAT _GetDXGIFormat( const DDS_PIXELFORMAT& ddpf, DWORD flags, _In cflags ^= CONV_FLAGS_SWIZZLE; } - if ( convFlags ) - *convFlags = cflags; + convFlags = cflags; return format; } @@ -202,8 +196,8 @@ static DXGI_FORMAT _GetDXGIFormat( const DDS_PIXELFORMAT& ddpf, DWORD flags, _In //------------------------------------------------------------------------------------- // Decodes DDS header including optional DX10 extended header //------------------------------------------------------------------------------------- -static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t size, DWORD flags, _Out_ TexMetadata& metadata, - _Inout_opt_ DWORD* convFlags ) +static HRESULT _DecodeDDSHeader( _In_reads_bytes_(size) LPCVOID pSource, size_t size, DWORD flags, _Out_ TexMetadata& metadata, + _Inout_ DWORD& convFlags ) { if ( !pSource ) return E_INVALIDARG; @@ -222,8 +216,7 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si return E_FAIL; } - const DDS_HEADER* pHeader = reinterpret_cast<const DDS_HEADER*>( (const uint8_t*)pSource + sizeof( uint32_t ) ); - assert( pHeader ); + auto pHeader = reinterpret_cast<const DDS_HEADER*>( (const uint8_t*)pSource + sizeof( uint32_t ) ); // Verify header to validate DDS file if ( pHeader->dwSize != sizeof(DDS_HEADER) @@ -241,14 +234,13 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si && (MAKEFOURCC( 'D', 'X', '1', '0' ) == pHeader->ddspf.dwFourCC) ) { // Buffer must be big enough for both headers and magic value - if ( size < (sizeof(DDS_HEADER)+sizeof(uint32_t)+sizeof(DDS_HEADER_DXT10)) ) + if ( size < ( sizeof(DDS_HEADER) + sizeof(uint32_t) + sizeof(DDS_HEADER_DXT10) ) ) { return E_FAIL; } - const DDS_HEADER_DXT10* d3d10ext = reinterpret_cast<const DDS_HEADER_DXT10*>( (const uint8_t*)pSource + sizeof( uint32_t ) + sizeof(DDS_HEADER) ); - if ( convFlags ) - *convFlags |= CONV_FLAGS_DX10; + auto d3d10ext = reinterpret_cast<const DDS_HEADER_DXT10*>( (const uint8_t*)pSource + sizeof( uint32_t ) + sizeof(DDS_HEADER) ); + convFlags |= CONV_FLAGS_DX10; metadata.arraySize = d3d10ext->arraySize; if ( metadata.arraySize == 0 ) @@ -257,11 +249,15 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si } metadata.format = d3d10ext->dxgiFormat; - if ( !IsValid( metadata.format ) ) + if ( !IsValid( metadata.format ) || IsPalettized( metadata.format ) ) { - HRESULT_FROM_WIN32( ERROR_INVALID_DATA ); + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } + static_assert( TEX_MISC_TEXTURECUBE == DDS_RESOURCE_MISC_TEXTURECUBE, "DDS header mismatch"); + + metadata.miscFlags = d3d10ext->miscFlag & ~TEX_MISC_TEXTURECUBE; + switch ( d3d10ext->resourceDimension ) { case DDS_DIMENSION_TEXTURE1D: @@ -309,6 +305,16 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si default: return HRESULT_FROM_WIN32( ERROR_INVALID_DATA ); } + + static_assert( TEX_MISC2_ALPHA_MODE_MASK == DDS_MISC_FLAGS2_ALPHA_MODE_MASK, "DDS header mismatch"); + + static_assert( TEX_ALPHA_MODE_UNKNOWN == DDS_ALPHA_MODE_UNKNOWN, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_STRAIGHT == DDS_ALPHA_MODE_STRAIGHT, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_PREMULTIPLIED == DDS_ALPHA_MODE_PREMULTIPLIED, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_OPAQUE == DDS_ALPHA_MODE_OPAQUE, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_CUSTOM == DDS_ALPHA_MODE_CUSTOM, "DDS header mismatch"); + + metadata.miscFlags2 = d3d10ext->miscFlags2; } else { @@ -345,6 +351,31 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si if ( metadata.format == DXGI_FORMAT_UNKNOWN ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + + if ( convFlags & CONV_FLAGS_PMALPHA ) + metadata.miscFlags2 |= TEX_ALPHA_MODE_PREMULTIPLIED; + + // Special flag for handling LUMINANCE legacy formats + if ( flags & DDS_FLAGS_EXPAND_LUMINANCE ) + { + switch ( metadata.format ) + { + case DXGI_FORMAT_R8_UNORM: + metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM; + convFlags |= CONV_FLAGS_L8 | CONV_FLAGS_EXPAND; + break; + + case DXGI_FORMAT_R8G8_UNORM: + metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM; + convFlags |= CONV_FLAGS_A8L8 | CONV_FLAGS_EXPAND; + break; + + case DXGI_FORMAT_R16_UNORM: + metadata.format = DXGI_FORMAT_R16G16B16A16_UNORM; + convFlags |= CONV_FLAGS_L16 | CONV_FLAGS_EXPAND; + break; + } + } } // Special flag for handling BGR DXGI 1.1 formats @@ -354,38 +385,32 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si { case DXGI_FORMAT_B8G8R8A8_UNORM: metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE; + convFlags |= CONV_FLAGS_SWIZZLE; break; case DXGI_FORMAT_B8G8R8X8_UNORM: metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; + convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; break; case DXGI_FORMAT_B8G8R8A8_TYPELESS: metadata.format = DXGI_FORMAT_R8G8B8A8_TYPELESS; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE; + convFlags |= CONV_FLAGS_SWIZZLE; break; case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE; + convFlags |= CONV_FLAGS_SWIZZLE; break; case DXGI_FORMAT_B8G8R8X8_TYPELESS: metadata.format = DXGI_FORMAT_R8G8B8A8_TYPELESS; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; + convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; break; case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; - if ( convFlags ) - *convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; + convFlags |= CONV_FLAGS_SWIZZLE | CONV_FLAGS_NOALPHA; break; } } @@ -397,16 +422,11 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si { case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: -#ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM: -#endif metadata.format = DXGI_FORMAT_R8G8B8A8_UNORM; - if ( convFlags ) - { - *convFlags |= CONV_FLAGS_EXPAND; - if ( metadata.format == DXGI_FORMAT_B5G6R5_UNORM ) - *convFlags |= CONV_FLAGS_NOALPHA; - } + convFlags |= CONV_FLAGS_EXPAND; + if ( metadata.format == DXGI_FORMAT_B5G6R5_UNORM ) + convFlags |= CONV_FLAGS_NOALPHA; } } @@ -417,19 +437,30 @@ static HRESULT _DecodeDDSHeader( _In_bytecount_(size) LPCVOID pSource, size_t si //------------------------------------------------------------------------------------- // Encodes DDS file header (magic value, header, optional DX10 extended header) //------------------------------------------------------------------------------------- -HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, - _Out_opt_cap_x_(maxsize) LPVOID pDestination, _In_ size_t maxsize, _Out_ size_t& required ) +_Use_decl_annotations_ +HRESULT _EncodeDDSHeader( const TexMetadata& metadata, DWORD flags, + LPVOID pDestination, size_t maxsize, size_t& required ) { - assert( IsValid( metadata.format ) && !IsVideo( metadata.format ) ); + if ( !IsValid( metadata.format ) ) + return E_INVALIDARG; + + if ( IsPalettized( metadata.format ) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); if ( metadata.arraySize > 1 ) { - if ( (metadata.arraySize != 6) || (metadata.dimension != TEX_DIMENSION_TEXTURE2D) || !(metadata.miscFlags & TEX_MISC_TEXTURECUBE) ) + if ( (metadata.arraySize != 6) || (metadata.dimension != TEX_DIMENSION_TEXTURE2D) || !(metadata.IsCubemap()) ) { + // Texture1D arrays, Texture2D arrays, and Cubemap arrays must be stored using 'DX10' extended header flags |= DDS_FLAGS_FORCE_DX10_EXT; } } + if ( flags & DDS_FLAGS_FORCE_DX10_EXT_MISC2 ) + { + flags |= DDS_FLAGS_FORCE_DX10_EXT; + } + DDS_PIXELFORMAT ddpf = { 0 }; if ( !(flags & DDS_FLAGS_FORCE_DX10_EXT) ) { @@ -444,8 +475,8 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, case DXGI_FORMAT_R8G8_B8G8_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_R8G8_B8G8, sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_G8R8_G8B8_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_G8R8_G8B8, sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_BC1_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_DXT1, sizeof(DDS_PIXELFORMAT) ); break; - case DXGI_FORMAT_BC2_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_DXT3, sizeof(DDS_PIXELFORMAT) ); break; - case DXGI_FORMAT_BC3_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_DXT5, sizeof(DDS_PIXELFORMAT) ); break; + case DXGI_FORMAT_BC2_UNORM: memcpy_s( &ddpf, sizeof(ddpf), metadata.IsPMAlpha() ? (&DDSPF_DXT2) : (&DDSPF_DXT3), sizeof(DDS_PIXELFORMAT) ); break; + case DXGI_FORMAT_BC3_UNORM: memcpy_s( &ddpf, sizeof(ddpf), metadata.IsPMAlpha() ? (&DDSPF_DXT4) : (&DDSPF_DXT5), sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_BC4_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_BC4_UNORM, sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_BC4_SNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_BC4_SNORM, sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_BC5_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_BC5_UNORM, sizeof(DDS_PIXELFORMAT) ); break; @@ -454,10 +485,8 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, case DXGI_FORMAT_B5G5R5A1_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_A1R5G5B5, sizeof(DDS_PIXELFORMAT) ); break; case DXGI_FORMAT_B8G8R8A8_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_A8R8G8B8, sizeof(DDS_PIXELFORMAT) ); break; // DXGI 1.1 case DXGI_FORMAT_B8G8R8X8_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_X8R8G8B8, sizeof(DDS_PIXELFORMAT) ); break; // DXGI 1.1 - -#ifdef DXGI_1_2_FORMATS - case DXGI_FORMAT_B4G4R4A4_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_A4R4G4B4, sizeof(DDS_PIXELFORMAT) ); break; -#endif + case DXGI_FORMAT_B4G4R4A4_UNORM: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_A4R4G4B4, sizeof(DDS_PIXELFORMAT) ); break; // DXGI 1.2 + case DXGI_FORMAT_YUY2: memcpy_s( &ddpf, sizeof(ddpf), &DDSPF_YUY2, sizeof(DDS_PIXELFORMAT) ); break; // DXGI 1.2 // Legacy D3DX formats using D3DFMT enum value as FourCC case DXGI_FORMAT_R32G32B32A32_FLOAT: @@ -500,7 +529,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, *reinterpret_cast<uint32_t*>(pDestination) = DDS_MAGIC; - DDS_HEADER* header = reinterpret_cast<DDS_HEADER*>( reinterpret_cast<uint8_t*>(pDestination) + sizeof(uint32_t) ); + auto header = reinterpret_cast<DDS_HEADER*>( reinterpret_cast<uint8_t*>(pDestination) + sizeof(uint32_t) ); assert( header ); memset( header, 0, sizeof(DDS_HEADER ) ); @@ -512,7 +541,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, { header->dwFlags |= DDS_HEADER_FLAGS_MIPMAP; -#ifdef _AMD64_ +#ifdef _M_X64 if ( metadata.mipLevels > 0xFFFFFFFF ) return E_INVALIDARG; #endif @@ -526,8 +555,8 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, switch( metadata.dimension ) { case TEX_DIMENSION_TEXTURE1D: -#ifdef _AMD64_ - if ( metadata.height > 0xFFFFFFFF ) +#ifdef _M_X64 + if ( metadata.width > 0xFFFFFFFF ) return E_INVALIDARG; #endif @@ -536,7 +565,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, break; case TEX_DIMENSION_TEXTURE2D: -#ifdef _AMD64_ +#ifdef _M_X64 if ( metadata.height > 0xFFFFFFFF || metadata.width > 0xFFFFFFFF) return E_INVALIDARG; @@ -546,7 +575,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, header->dwWidth = static_cast<uint32_t>( metadata.width ); header->dwDepth = 1; - if ( metadata.miscFlags & TEX_MISC_TEXTURECUBE ) + if ( metadata.IsCubemap() ) { header->dwCaps |= DDS_SURFACE_FLAGS_CUBEMAP; header->dwCaps2 |= DDS_CUBEMAP_ALLFACES; @@ -554,7 +583,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, break; case TEX_DIMENSION_TEXTURE3D: -#ifdef _AMD64_ +#ifdef _M_X64 if ( metadata.height > 0xFFFFFFFF || metadata.width > 0xFFFFFFFF || metadata.depth > 0xFFFFFFFF ) @@ -575,7 +604,7 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, size_t rowPitch, slicePitch; ComputePitch( metadata.format, metadata.width, metadata.height, rowPitch, slicePitch, CP_FLAGS_NONE ); -#ifdef _AMD64_ +#ifdef _M_X64 if ( slicePitch > 0xFFFFFFFF || rowPitch > 0xFFFFFFFF ) return E_FAIL; @@ -596,18 +625,22 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, { memcpy_s( &header->ddspf, sizeof(header->ddspf), &DDSPF_DX10, sizeof(DDS_PIXELFORMAT) ); - DDS_HEADER_DXT10* ext = reinterpret_cast<DDS_HEADER_DXT10*>( reinterpret_cast<uint8_t*>(header) + sizeof(DDS_HEADER) ); + auto ext = reinterpret_cast<DDS_HEADER_DXT10*>( reinterpret_cast<uint8_t*>(header) + sizeof(DDS_HEADER) ); assert( ext ); memset( ext, 0, sizeof(DDS_HEADER_DXT10) ); ext->dxgiFormat = metadata.format; ext->resourceDimension = metadata.dimension; -#ifdef _AMD64_ +#ifdef _M_X64 if ( metadata.arraySize > 0xFFFFFFFF ) return E_INVALIDARG; #endif + static_assert( TEX_MISC_TEXTURECUBE == DDS_RESOURCE_MISC_TEXTURECUBE, "DDS header mismatch"); + + ext->miscFlag = metadata.miscFlags & ~TEX_MISC_TEXTURECUBE; + if ( metadata.miscFlags & TEX_MISC_TEXTURECUBE ) { ext->miscFlag |= TEX_MISC_TEXTURECUBE; @@ -618,6 +651,20 @@ HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, { ext->arraySize = static_cast<UINT>( metadata.arraySize ); } + + static_assert( TEX_MISC2_ALPHA_MODE_MASK == DDS_MISC_FLAGS2_ALPHA_MODE_MASK, "DDS header mismatch"); + + static_assert( TEX_ALPHA_MODE_UNKNOWN == DDS_ALPHA_MODE_UNKNOWN, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_STRAIGHT == DDS_ALPHA_MODE_STRAIGHT, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_PREMULTIPLIED == DDS_ALPHA_MODE_PREMULTIPLIED, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_OPAQUE == DDS_ALPHA_MODE_OPAQUE, "DDS header mismatch"); + static_assert( TEX_ALPHA_MODE_CUSTOM == DDS_ALPHA_MODE_CUSTOM, "DDS header mismatch"); + + if ( flags & DDS_FLAGS_FORCE_DX10_EXT_MISC2 ) + { + // This was formerly 'reserved'. D3DX10 and D3DX11 will fail if this value is anything other than 0 + ext->miscFlags2 = metadata.miscFlags2; + } } else { @@ -642,6 +689,9 @@ enum TEXP_LEGACY_FORMAT TEXP_LEGACY_A8P8, TEXP_LEGACY_A4L4, TEXP_LEGACY_B4G4R4A4, + TEXP_LEGACY_L8, + TEXP_LEGACY_L16, + TEXP_LEGACY_A8L8 }; inline static TEXP_LEGACY_FORMAT _FindLegacyFormat( DWORD flags ) @@ -660,21 +710,26 @@ inline static TEXP_LEGACY_FORMAT _FindLegacyFormat( DWORD flags ) lformat = TEXP_LEGACY_A8R3G3B2; else if ( flags & CONV_FLAGS_44 ) lformat = TEXP_LEGACY_A4L4; -#ifndef DXGI_1_2_FORMATS else if ( flags & CONV_FLAGS_4444 ) lformat = TEXP_LEGACY_B4G4R4A4; -#endif + else if ( flags & CONV_FLAGS_L8 ) + lformat = TEXP_LEGACY_L8; + else if ( flags & CONV_FLAGS_L16 ) + lformat = TEXP_LEGACY_L16; + else if ( flags & CONV_FLAGS_A8L8 ) + lformat = TEXP_LEGACY_A8L8; return lformat; } -static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, size_t outSize, _In_ DXGI_FORMAT outFormat, - _In_bytecount_(inSize) LPCVOID pSource, size_t inSize, _In_ TEXP_LEGACY_FORMAT inFormat, - _In_opt_count_c_(256) const uint32_t* pal8, _In_ DWORD flags ) +_Success_(return != false) +static bool _LegacyExpandScanline( _Out_writes_bytes_(outSize) LPVOID pDestination, size_t outSize, _In_ DXGI_FORMAT outFormat, + _In_reads_bytes_(inSize) LPCVOID pSource, size_t inSize, _In_ TEXP_LEGACY_FORMAT inFormat, + _In_reads_opt_(256) const uint32_t* pal8, _In_ DWORD flags ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); - assert( IsValid(outFormat) && !IsVideo(outFormat) ); + assert( IsValid(outFormat) && !IsPlanar(outFormat) && !IsPalettized(outFormat) ); switch( inFormat ) { @@ -683,11 +738,12 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s return false; // D3DFMT_R8G8B8 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 3 && outSize >= 4 ) { const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 3, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 2 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 3, ocount += 4 ) { // 24bpp Direct3D 9 files are actually BGR, so need to swizzle as well uint32_t t1 = ( *(sPtr) << 16 ); @@ -697,19 +753,21 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | t2 | t3 | 0xff000000; sPtr += 3; } + return true; } - return true; + return false; case TEXP_LEGACY_R3G3B2: switch( outFormat ) { case DXGI_FORMAT_R8G8B8A8_UNORM: // D3DFMT_R3G3B2 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 1 && outSize >= 4 ) { const uint8_t* __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); ++icount, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 3 ) ) ); ++icount, ocount += 4 ) { uint8_t t = *(sPtr++); @@ -719,16 +777,18 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | t2 | t3 | 0xff000000; } + return true; } - return true; + return false; case DXGI_FORMAT_B5G6R5_UNORM: // D3DFMT_R3G3B2 -> DXGI_FORMAT_B5G6R5_UNORM + if ( inSize >= 1 && outSize >= 2 ) { const uint8_t* __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); ++icount, ocount += 2 ) + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 1 ) ) ); ++icount, ocount += 2 ) { uint8_t t = *(sPtr++); @@ -738,8 +798,9 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | t2 | t3; } + return true; } - return true; + return false; } break; @@ -748,11 +809,12 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s return false; // D3DFMT_A8R3G3B2 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -763,37 +825,41 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | t2 | t3 | ta; } + return true; } - return true; + return false; case TEXP_LEGACY_P8: if ( (outFormat != DXGI_FORMAT_R8G8B8A8_UNORM) || !pal8 ) return false; // D3DFMT_P8 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 1 && outSize >= 4 ) { const uint8_t* __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); ++icount, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 3 ) ) ); ++icount, ocount += 4 ) { uint8_t t = *(sPtr++); *(dPtr++) = pal8[ t ]; } + return true; } - return true; + return false; case TEXP_LEGACY_A8P8: if ( (outFormat != DXGI_FORMAT_R8G8B8A8_UNORM) || !pal8 ) return false; // D3DFMT_A8P8 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -802,20 +868,21 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | ta; } + return true; } - return true; + return false; case TEXP_LEGACY_A4L4: switch( outFormat ) { -#ifdef DXGI_1_2_FORMATS case DXGI_FORMAT_B4G4R4A4_UNORM : // D3DFMT_A4L4 -> DXGI_FORMAT_B4G4R4A4_UNORM + if ( inSize >= 1 && outSize >= 2 ) { const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); ++icount, ocount += 2 ) + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 1 ) ) ); ++icount, ocount += 2 ) { uint8_t t = *(sPtr++); @@ -824,17 +891,18 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | (t1 << 4) | (t1 << 8) | ta; } + return true; } - return true; -#endif // DXGI_1_2_FORMATS + return false; case DXGI_FORMAT_R8G8B8A8_UNORM: // D3DFMT_A4L4 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 1 && outSize >= 4 ) { const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); ++icount, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 3 ) ) ); ++icount, ocount += 4 ) { uint8_t t = *(sPtr++); @@ -843,22 +911,23 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | (t1 << 8) | (t1 << 16) | ta; } + return true; } - return true; + return false; } break; -#ifndef DXGI_1_2_FORMATS case TEXP_LEGACY_B4G4R4A4: if (outFormat != DXGI_FORMAT_R8G8B8A8_UNORM) return false; // D3DFMT_A4R4G4B4 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) { const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); - for( size_t ocount = 0, icount = 0; ((icount < inSize) && (ocount < outSize)); icount += 2, ocount += 4 ) + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) { uint16_t t = *(sPtr++); @@ -869,9 +938,80 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s *(dPtr++) = t1 | t2 | t3 | ta; } + return true; } - return true; -#endif + return false; + + case TEXP_LEGACY_L8: + if (outFormat != DXGI_FORMAT_R8G8B8A8_UNORM) + return false; + + // D3DFMT_L8 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 1 && outSize >= 4 ) + { + const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + + for( size_t ocount = 0, icount = 0; ( ( icount < inSize ) && ( ocount < ( outSize - 3 ) ) ); ++icount, ocount += 4 ) + { + uint32_t t1 = *(sPtr++); + uint32_t t2 = (t1 << 8); + uint32_t t3 = (t1 << 16); + + *(dPtr++) = t1 | t2 | t3 | 0xff000000; + } + return true; + } + return false; + + case TEXP_LEGACY_L16: + if (outFormat != DXGI_FORMAT_R16G16B16A16_UNORM) + return false; + + // D3DFMT_L16 -> DXGI_FORMAT_R16G16B16A16_UNORM + if ( inSize >= 2 && outSize >= 8 ) + { + const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); + uint64_t * __restrict dPtr = reinterpret_cast<uint64_t*>(pDestination); + + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 7 ) ) ); icount += 2, ocount += 8 ) + { + uint16_t t = *(sPtr++); + + uint64_t t1 = t; + uint64_t t2 = (t1 << 16); + uint64_t t3 = (t1 << 32); + + *(dPtr++) = t1 | t2 | t3 | 0xffff000000000000; + } + return true; + } + return false; + + case TEXP_LEGACY_A8L8: + if (outFormat != DXGI_FORMAT_R8G8B8A8_UNORM) + return false; + + // D3DFMT_A8L8 -> DXGI_FORMAT_R8G8B8A8_UNORM + if ( inSize >= 2 && outSize >= 4 ) + { + const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource); + uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination); + + for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 ) + { + uint16_t t = *(sPtr++); + + uint32_t t1 = (t & 0xff); + uint32_t t2 = (t1 << 8); + uint32_t t3 = (t1 << 16); + uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : ((t & 0xff00) << 16); + + *(dPtr++) = t1 | t2 | t3 | ta; + } + return true; + } + return false; } return false; @@ -881,22 +1021,22 @@ static bool _LegacyExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, s //------------------------------------------------------------------------------------- // Converts or copies image data from pPixels into scratch image data //------------------------------------------------------------------------------------- -static HRESULT _CopyImage( _In_bytecount_(size) const void* pPixels, _In_ size_t size, - _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, _In_ DWORD convFlags, _In_opt_count_c_(256) const uint32_t *pal8, _In_ const ScratchImage& image ) +static HRESULT _CopyImage( _In_reads_bytes_(size) const void* pPixels, _In_ size_t size, + _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, _In_ DWORD convFlags, _In_reads_opt_(256) const uint32_t *pal8, _In_ const ScratchImage& image ) { assert( pPixels ); assert( image.GetPixels() ); if ( !size ) return E_FAIL; - + if ( convFlags & CONV_FLAGS_EXPAND ) { if ( convFlags & CONV_FLAGS_888 ) cpFlags |= CP_FLAGS_24BPP; - else if ( convFlags & (CONV_FLAGS_565 | CONV_FLAGS_5551 | CONV_FLAGS_4444 | CONV_FLAGS_8332 | CONV_FLAGS_A8P8 ) ) + else if ( convFlags & (CONV_FLAGS_565 | CONV_FLAGS_5551 | CONV_FLAGS_4444 | CONV_FLAGS_8332 | CONV_FLAGS_A8P8 | CONV_FLAGS_L16 | CONV_FLAGS_A8L8) ) cpFlags |= CP_FLAGS_16BPP; - else if ( convFlags & (CONV_FLAGS_44 | CONV_FLAGS_332 | CONV_FLAGS_PAL8) ) + else if ( convFlags & (CONV_FLAGS_44 | CONV_FLAGS_332 | CONV_FLAGS_PAL8 | CONV_FLAGS_L8) ) cpFlags |= CP_FLAGS_8BPP; } @@ -909,7 +1049,12 @@ static HRESULT _CopyImage( _In_bytecount_(size) const void* pPixels, _In_ size_t assert( pixelSize <= size ); - std::unique_ptr<Image[]> timages( new Image[nimages] ); + std::unique_ptr<Image[]> timages( new (std::nothrow) Image[nimages] ); + if ( !timages ) + { + return E_OUTOFMEMORY; + } + if ( !_SetupImageArray( (uint8_t*)pPixels, size, metadata, cpFlags, timages.get(), nimages ) ) { return E_FAIL; @@ -962,17 +1107,27 @@ static HRESULT _CopyImage( _In_bytecount_(size) const void* pPixels, _In_ size_t size_t csize = std::min<size_t>( images[ index ].slicePitch, timages[ index ].slicePitch ); memcpy_s( pDest, images[ index ].slicePitch, pSrc, csize ); } + else if ( IsPlanar( metadata.format ) ) + { + size_t count = ComputeScanlines( metadata.format, images[ index ].height ); + if ( !count ) + return E_UNEXPECTED; + + size_t csize = std::min<size_t>( dpitch, spitch ); + for( size_t h = 0; h < count; ++h ) + { + memcpy_s( pDest, dpitch, pSrc, csize ); + pSrc += spitch; + pDest += dpitch; + } + } else { for( size_t h = 0; h < images[ index ].height; ++h ) { if ( convFlags & CONV_FLAGS_EXPAND ) { -#ifdef DXGI_1_2_FORMATS if ( convFlags & (CONV_FLAGS_565|CONV_FLAGS_5551|CONV_FLAGS_4444) ) -#else - if ( convFlags & (CONV_FLAGS_565|CONV_FLAGS_5551) ) -#endif { if ( !_ExpandScanline( pDest, dpitch, DXGI_FORMAT_R8G8B8A8_UNORM, pSrc, spitch, @@ -1040,17 +1195,18 @@ static HRESULT _CopyImage( _In_bytecount_(size) const void* pPixels, _In_ size_t size_t csize = std::min<size_t>( images[ index ].slicePitch, timages[ index ].slicePitch ); memcpy_s( pDest, images[ index ].slicePitch, pSrc, csize ); } + else if ( IsPlanar( metadata.format ) ) + { + // Direct3D does not support any planar formats for Texture3D + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } else { for( size_t h = 0; h < images[ index ].height; ++h ) { if ( convFlags & CONV_FLAGS_EXPAND ) { -#ifdef DXGI_1_2_FORMATS if ( convFlags & (CONV_FLAGS_565|CONV_FLAGS_5551|CONV_FLAGS_4444) ) -#else - if ( convFlags & (CONV_FLAGS_565|CONV_FLAGS_5551) ) -#endif { if ( !_ExpandScanline( pDest, dpitch, DXGI_FORMAT_R8G8B8A8_UNORM, pSrc, spitch, @@ -1106,6 +1262,9 @@ static HRESULT _CopyImageInPlace( DWORD convFlags, _In_ const ScratchImage& imag const TexMetadata& metadata = image.GetMetadata(); + if ( IsPlanar( metadata.format ) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + DWORD tflags = (convFlags & CONV_FLAGS_NOALPHA) ? TEXP_SCANLINE_SETALPHA : 0; if ( convFlags & CONV_FLAGS_SWIZZLE ) tflags |= TEXP_SCANLINE_LEGACY; @@ -1146,20 +1305,23 @@ static HRESULT _CopyImageInPlace( DWORD convFlags, _In_ const ScratchImage& imag // Obtain metadata from DDS file in memory/on disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GetMetadataFromDDSMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadata& metadata ) { if ( !pSource || size == 0 ) return E_INVALIDARG; - return _DecodeDDSHeader( pSource, size, flags, metadata, 0 ); + DWORD convFlags = 0; + return _DecodeDDSHeader( pSource, size, flags, metadata, convFlags ); } +_Use_decl_annotations_ HRESULT GetMetadataFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata& metadata ) { if ( !szFile ) return E_INVALIDARG; -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle( CreateFile2( szFile, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, 0 ) ) ); #else ScopedHandle hFile( safe_handle( CreateFileW( szFile, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, @@ -1209,13 +1371,15 @@ HRESULT GetMetadataFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata& metada return HRESULT_FROM_WIN32( GetLastError() ); } - return _DecodeDDSHeader( header, bytesRead, flags, metadata, 0 ); + DWORD convFlags = 0; + return _DecodeDDSHeader( header, bytesRead, flags, metadata, convFlags ); } //------------------------------------------------------------------------------------- // Load a DDS file in memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromDDSMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadata* metadata, ScratchImage& image ) { if ( !pSource || size == 0 ) @@ -1225,7 +1389,7 @@ HRESULT LoadFromDDSMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat DWORD convFlags = 0; TexMetadata mdata; - HRESULT hr = _DecodeDDSHeader( pSource, size, flags, mdata, &convFlags ); + HRESULT hr = _DecodeDDSHeader( pSource, size, flags, mdata, convFlags ); if ( FAILED(hr) ) return hr; @@ -1249,7 +1413,7 @@ HRESULT LoadFromDDSMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat if ( FAILED(hr) ) return hr; - LPCVOID pPixels = reinterpret_cast<LPCVOID>( reinterpret_cast<const uint8_t*>(pSource) + offset ); + auto pPixels = reinterpret_cast<LPCVOID>( reinterpret_cast<const uint8_t*>(pSource) + offset ); assert( pPixels ); hr = _CopyImage( pPixels, size - offset, mdata, (flags & DDS_FLAGS_LEGACY_DWORD) ? CP_FLAGS_LEGACY_DWORD : CP_FLAGS_NONE, convFlags, pal8, image ); @@ -1268,6 +1432,7 @@ HRESULT LoadFromDDSMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat //------------------------------------------------------------------------------------- // Load a DDS file from disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, ScratchImage& image ) { if ( !szFile ) @@ -1275,7 +1440,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr image.Release(); -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle ( CreateFile2( szFile, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, 0 ) ) ); #else ScopedHandle hFile( safe_handle ( CreateFileW( szFile, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, @@ -1328,7 +1493,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr DWORD convFlags = 0; TexMetadata mdata; - HRESULT hr = _DecodeDDSHeader( header, bytesRead, flags, mdata, &convFlags ); + HRESULT hr = _DecodeDDSHeader( header, bytesRead, flags, mdata, convFlags ); if ( FAILED(hr) ) return hr; @@ -1349,7 +1514,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr std::unique_ptr<uint32_t[]> pal8; if ( convFlags & CONV_FLAGS_PAL8 ) { - pal8.reset( new uint32_t[256] ); + pal8.reset( new (std::nothrow) uint32_t[256] ); if ( !pal8 ) { return E_OUTOFMEMORY; @@ -1378,7 +1543,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr if ( (convFlags & CONV_FLAGS_EXPAND) || (flags & DDS_FLAGS_LEGACY_DWORD) ) { - std::unique_ptr<uint8_t[]> temp( new uint8_t[ remaining ] ); + std::unique_ptr<uint8_t[]> temp( new (std::nothrow) uint8_t[ remaining ] ); if ( !temp ) { image.Release(); @@ -1408,7 +1573,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr } else { - if ( remaining > image.GetPixelsSize() ) + if ( remaining < image.GetPixelsSize() ) { image.Release(); return E_FAIL; @@ -1442,6 +1607,7 @@ HRESULT LoadFromDDSFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr //------------------------------------------------------------------------------------- // Save a DDS file to memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& metadata, DWORD flags, Blob& blob ) { if ( !images || (nimages == 0) ) @@ -1453,11 +1619,28 @@ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& if ( FAILED(hr) ) return hr; + bool fastpath = true; + for( size_t i = 0; i < nimages; ++i ) { - required += images[ i ].slicePitch; if ( !images[ i ].pixels ) return E_POINTER; + + if ( images[ i ].format != metadata.format ) + return E_FAIL; + + size_t ddsRowPitch, ddsSlicePitch; + ComputePitch( metadata.format, images[ i ].width, images[ i ].height, ddsRowPitch, ddsSlicePitch, CP_FLAGS_NONE ); + + assert( images[ i ].rowPitch > 0 ); + assert( images[ i ].slicePitch > 0 ); + + if ( ( images[ i ].rowPitch != ddsRowPitch ) || ( images[ i ].slicePitch != ddsSlicePitch ) ) + { + fastpath = false; + } + + required += ddsSlicePitch; } assert( required > 0 ); @@ -1468,7 +1651,7 @@ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& if ( FAILED(hr) ) return hr; - uint8_t* pDestination = reinterpret_cast<uint8_t*>( blob.GetBufferPointer() ); + auto pDestination = reinterpret_cast<uint8_t*>( blob.GetBufferPointer() ); assert( pDestination ); hr = _EncodeDDSHeader( metadata, flags, pDestination, blob.GetBufferSize(), required ); @@ -1503,14 +1686,47 @@ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& return E_FAIL; } - size_t pixsize = images[ index ].slicePitch; - if ( memcpy_s( pDestination, remaining, images[ index ].pixels, pixsize ) ) + if ( fastpath ) { - blob.Release(); - return E_FAIL; + size_t pixsize = images[ index ].slicePitch; + if ( memcpy_s( pDestination, remaining, images[ index ].pixels, pixsize ) ) + { + blob.Release(); + return E_FAIL; + } + + pDestination += pixsize; + remaining -= pixsize; + } + else + { + size_t ddsRowPitch, ddsSlicePitch; + ComputePitch( metadata.format, images[ index ].width, images[ index ].height, ddsRowPitch, ddsSlicePitch, CP_FLAGS_NONE ); + + size_t rowPitch = images[ index ].rowPitch; + + const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(images[ index ].pixels); + uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination); + + size_t lines = ComputeScanlines( metadata.format, images[ index ].height ); + size_t csize = std::min<size_t>( rowPitch, ddsRowPitch ); + size_t tremaining = remaining; + for( size_t j = 0; j < lines; ++j ) + { + if ( memcpy_s( dPtr, tremaining, sPtr, csize ) ) + { + blob.Release(); + return E_FAIL; + } + + sPtr += rowPitch; + dPtr += ddsRowPitch; + tremaining -= ddsRowPitch; + } + + pDestination += ddsSlicePitch; + remaining -= ddsSlicePitch; } - pDestination += pixsize; - remaining -= pixsize; ++index; } @@ -1539,14 +1755,47 @@ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& return E_FAIL; } - size_t pixsize = images[ index ].slicePitch; - if ( memcpy_s( pDestination, remaining, images[ index ].pixels, pixsize ) ) + if ( fastpath ) { - blob.Release(); - return E_FAIL; + size_t pixsize = images[ index ].slicePitch; + if ( memcpy_s( pDestination, remaining, images[ index ].pixels, pixsize ) ) + { + blob.Release(); + return E_FAIL; + } + + pDestination += pixsize; + remaining -= pixsize; + } + else + { + size_t ddsRowPitch, ddsSlicePitch; + ComputePitch( metadata.format, images[ index ].width, images[ index ].height, ddsRowPitch, ddsSlicePitch, CP_FLAGS_NONE ); + + size_t rowPitch = images[ index ].rowPitch; + + const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(images[ index ].pixels); + uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination); + + size_t lines = ComputeScanlines( metadata.format, images[ index ].height ); + size_t csize = std::min<size_t>( rowPitch, ddsRowPitch ); + size_t tremaining = remaining; + for( size_t j = 0; j < lines; ++j ) + { + if ( memcpy_s( dPtr, tremaining, sPtr, csize ) ) + { + blob.Release(); + return E_FAIL; + } + + sPtr += rowPitch; + dPtr += ddsRowPitch; + tremaining -= ddsRowPitch; + } + + pDestination += ddsSlicePitch; + remaining -= ddsSlicePitch; } - pDestination += pixsize; - remaining -= pixsize; ++index; } @@ -1569,6 +1818,7 @@ HRESULT SaveToDDSMemory( const Image* images, size_t nimages, const TexMetadata& //------------------------------------------------------------------------------------- // Save a DDS file to disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT SaveToDDSFile( const Image* images, size_t nimages, const TexMetadata& metadata, DWORD flags, LPCWSTR szFile ) { if ( !szFile ) @@ -1583,7 +1833,7 @@ HRESULT SaveToDDSFile( const Image* images, size_t nimages, const TexMetadata& m return hr; // Create file and write header -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle( CreateFile2( szFile, GENERIC_WRITE, 0, CREATE_ALWAYS, 0 ) ) ); #else ScopedHandle hFile( safe_handle( CreateFileW( szFile, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0 ) ) ); @@ -1621,16 +1871,50 @@ HRESULT SaveToDDSFile( const Image* images, size_t nimages, const TexMetadata& m if ( !images[ index ].pixels ) return E_POINTER; - size_t pixsize = images[ index ].slicePitch; + assert( images[ index ].rowPitch > 0 ); + assert( images[ index ].slicePitch > 0 ); + + size_t ddsRowPitch, ddsSlicePitch; + ComputePitch( metadata.format, images[ index ].width, images[ index ].height, ddsRowPitch, ddsSlicePitch, CP_FLAGS_NONE ); - if ( !WriteFile( hFile.get(), images[ index ].pixels, static_cast<DWORD>( pixsize ), &bytesWritten, 0 ) ) + if ( images[ index ].slicePitch == ddsSlicePitch ) { - return HRESULT_FROM_WIN32( GetLastError() ); - } + if ( !WriteFile( hFile.get(), images[ index ].pixels, static_cast<DWORD>( ddsSlicePitch ), &bytesWritten, 0 ) ) + { + return HRESULT_FROM_WIN32( GetLastError() ); + } - if ( bytesWritten != pixsize ) + if ( bytesWritten != ddsSlicePitch ) + { + return E_FAIL; + } + } + else { - return E_FAIL; + size_t rowPitch = images[ index ].rowPitch; + if ( rowPitch < ddsRowPitch ) + { + // DDS uses 1-byte alignment, so if this is happening then the input pitch isn't actually a full line of data + return E_FAIL; + } + + const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(images[ index ].pixels); + + size_t lines = ComputeScanlines( metadata.format, images[ index ].height ); + for( size_t j = 0; j < lines; ++j ) + { + if ( !WriteFile( hFile.get(), sPtr, static_cast<DWORD>( ddsRowPitch ), &bytesWritten, 0 ) ) + { + return HRESULT_FROM_WIN32( GetLastError() ); + } + + if ( bytesWritten != ddsRowPitch ) + { + return E_FAIL; + } + + sPtr += rowPitch; + } } } } @@ -1655,16 +1939,50 @@ HRESULT SaveToDDSFile( const Image* images, size_t nimages, const TexMetadata& m if ( !images[ index ].pixels ) return E_POINTER; - size_t pixsize = images[ index ].slicePitch; + assert( images[ index ].rowPitch > 0 ); + assert( images[ index ].slicePitch > 0 ); - if ( !WriteFile( hFile.get(), images[ index ].pixels, static_cast<DWORD>( pixsize ), &bytesWritten, 0 ) ) + size_t ddsRowPitch, ddsSlicePitch; + ComputePitch( metadata.format, images[ index ].width, images[ index ].height, ddsRowPitch, ddsSlicePitch, CP_FLAGS_NONE ); + + if ( images[ index ].slicePitch == ddsSlicePitch ) { - return HRESULT_FROM_WIN32( GetLastError() ); - } + if ( !WriteFile( hFile.get(), images[ index ].pixels, static_cast<DWORD>( ddsSlicePitch ), &bytesWritten, 0 ) ) + { + return HRESULT_FROM_WIN32( GetLastError() ); + } - if ( bytesWritten != pixsize ) + if ( bytesWritten != ddsSlicePitch ) + { + return E_FAIL; + } + } + else { - return E_FAIL; + size_t rowPitch = images[ index ].rowPitch; + if ( rowPitch < ddsRowPitch ) + { + // DDS uses 1-byte alignment, so if this is happening then the input pitch isn't actually a full line of data + return E_FAIL; + } + + const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(images[ index ].pixels); + + size_t lines = ComputeScanlines( metadata.format, images[ index ].height ); + for( size_t j = 0; j < lines; ++j ) + { + if ( !WriteFile( hFile.get(), sPtr, static_cast<DWORD>( ddsRowPitch ), &bytesWritten, 0 ) ) + { + return HRESULT_FROM_WIN32( GetLastError() ); + } + + if ( bytesWritten != ddsRowPitch ) + { + return E_FAIL; + } + + sPtr += rowPitch; + } } } diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexFlipRotate.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexFlipRotate.cpp index c90ea090..4459e7f3 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexFlipRotate.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexFlipRotate.cpp @@ -15,6 +15,8 @@ #include "DirectXTexP.h" +using Microsoft::WRL::ComPtr; + namespace DirectX { @@ -33,15 +35,15 @@ static HRESULT _PerformFlipRotateUsingWIC( _In_ const Image& srcImage, _In_ DWOR if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICBitmap> source; + ComPtr<IWICBitmap> source; HRESULT hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID, static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ), - srcImage.pixels, &source ); + srcImage.pixels, source.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFlipRotator> FR; - hr = pWIC->CreateBitmapFlipRotator( &FR ); + ComPtr<IWICBitmapFlipRotator> FR; + hr = pWIC->CreateBitmapFlipRotator( FR.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -126,6 +128,7 @@ static HRESULT _PerformFlipRotateViaF32( _In_ const Image& srcImage, _In_ DWORD //------------------------------------------------------------------------------------- // Flip/rotate image //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT FlipRotate( const Image& srcImage, DWORD flags, ScratchImage& image ) { if ( !srcImage.pixels ) @@ -134,7 +137,7 @@ HRESULT FlipRotate( const Image& srcImage, DWORD flags, ScratchImage& image ) if ( !flags ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif @@ -207,6 +210,7 @@ HRESULT FlipRotate( const Image& srcImage, DWORD flags, ScratchImage& image ) //------------------------------------------------------------------------------------- // Flip/rotate image (complex) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT FlipRotate( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DWORD flags, ScratchImage& result ) { @@ -278,7 +282,7 @@ HRESULT FlipRotate( const Image* srcImages, size_t nimages, const TexMetadata& m return E_FAIL; } -#ifdef _AMD64_ +#ifdef _M_X64 if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) return E_FAIL; #endif diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexImage.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexImage.cpp index 95d54267..28f433ec 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexImage.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexImage.cpp @@ -20,10 +20,12 @@ namespace DirectX extern bool _CalculateMipLevels( _In_ size_t width, _In_ size_t height, _Inout_ size_t& mipLevels ); extern bool _CalculateMipLevels3D( _In_ size_t width, _In_ size_t height, _In_ size_t depth, _Inout_ size_t& mipLevels ); +extern bool _IsAlphaAllOpaqueBC( _In_ const Image& cImage ); //------------------------------------------------------------------------------------- // Determines number of image array entries and pixel size //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void _DetermineImageArray( const TexMetadata& metadata, DWORD cpFlags, size_t& nImages, size_t& pixelSize ) { @@ -102,6 +104,7 @@ void _DetermineImageArray( const TexMetadata& metadata, DWORD cpFlags, //------------------------------------------------------------------------------------- // Fills in the image array entries //------------------------------------------------------------------------------------- +_Use_decl_annotations_ bool _SetupImageArray( uint8_t *pMemory, size_t pixelSize, const TexMetadata& metadata, DWORD cpFlags, Image* images, size_t nImages ) @@ -226,14 +229,39 @@ bool _SetupImageArray( uint8_t *pMemory, size_t pixelSize, // ScratchImage - Bitmap image container //===================================================================================== +ScratchImage& ScratchImage::operator= (ScratchImage&& moveFrom) +{ + if ( this != &moveFrom ) + { + Release(); + + _nimages = moveFrom._nimages; + _size = moveFrom._size; + _metadata = moveFrom._metadata; + _image = moveFrom._image; + _memory = moveFrom._memory; + + moveFrom._nimages = 0; + moveFrom._size = 0; + moveFrom._image = nullptr; + moveFrom._memory = nullptr; + } + return *this; +} + + //------------------------------------------------------------------------------------- // Methods //------------------------------------------------------------------------------------- -HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) +_Use_decl_annotations_ +HRESULT ScratchImage::Initialize( const TexMetadata& mdata, DWORD flags ) { - if ( !IsValid(mdata.format) || IsVideo(mdata.format) ) + if ( !IsValid(mdata.format) ) return E_INVALIDARG; + if ( IsPalettized(mdata.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + size_t mipLevels = mdata.mipLevels; switch( mdata.dimension ) @@ -250,7 +278,7 @@ HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) if ( !mdata.width || !mdata.height || mdata.depth != 1 || !mdata.arraySize ) return E_INVALIDARG; - if ( mdata.miscFlags & TEX_MISC_TEXTURECUBE ) + if ( mdata.IsCubemap() ) { if ( (mdata.arraySize % 6) != 0 ) return E_INVALIDARG; @@ -263,7 +291,7 @@ HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) case TEX_DIMENSION_TEXTURE3D: if ( !mdata.width || !mdata.height || !mdata.depth || mdata.arraySize != 1 ) return E_INVALIDARG; - + if ( !_CalculateMipLevels3D(mdata.width,mdata.height,mdata.depth,mipLevels) ) return E_INVALIDARG; break; @@ -279,14 +307,15 @@ HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) _metadata.depth = mdata.depth; _metadata.arraySize = mdata.arraySize; _metadata.mipLevels = mipLevels; - _metadata.miscFlags = mdata.miscFlags & TEX_MISC_TEXTURECUBE; + _metadata.miscFlags = mdata.miscFlags; + _metadata.miscFlags2 = mdata.miscFlags2; _metadata.format = mdata.format; _metadata.dimension = mdata.dimension; size_t pixelSize, nimages; - _DetermineImageArray( _metadata, CP_FLAGS_NONE, nimages, pixelSize ); + _DetermineImageArray( _metadata, flags, nimages, pixelSize ); - _image = new Image[ nimages ]; + _image = new (std::nothrow) Image[ nimages ]; if ( !_image ) return E_OUTOFMEMORY; @@ -300,7 +329,7 @@ HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) return E_OUTOFMEMORY; } _size = pixelSize; - if ( !_SetupImageArray( _memory, pixelSize, _metadata, CP_FLAGS_NONE, _image, nimages ) ) + if ( !_SetupImageArray( _memory, pixelSize, _metadata, flags, _image, nimages ) ) { Release(); return E_FAIL; @@ -309,13 +338,14 @@ HRESULT ScratchImage::Initialize( const TexMetadata& mdata ) return S_OK; } -HRESULT ScratchImage::Initialize1D( DXGI_FORMAT fmt, size_t length, size_t arraySize, size_t mipLevels ) +_Use_decl_annotations_ +HRESULT ScratchImage::Initialize1D( DXGI_FORMAT fmt, size_t length, size_t arraySize, size_t mipLevels, DWORD flags ) { - if ( !IsValid(fmt) || IsVideo(fmt) || !length || !arraySize ) + if ( !length || !arraySize ) return E_INVALIDARG; // 1D is a special case of the 2D case - HRESULT hr = Initialize2D( fmt, length, 1, arraySize, mipLevels ); + HRESULT hr = Initialize2D( fmt, length, 1, arraySize, mipLevels, flags ); if ( FAILED(hr) ) return hr; @@ -324,11 +354,15 @@ HRESULT ScratchImage::Initialize1D( DXGI_FORMAT fmt, size_t length, size_t array return S_OK; } -HRESULT ScratchImage::Initialize2D( DXGI_FORMAT fmt, size_t width, size_t height, size_t arraySize, size_t mipLevels ) +_Use_decl_annotations_ +HRESULT ScratchImage::Initialize2D( DXGI_FORMAT fmt, size_t width, size_t height, size_t arraySize, size_t mipLevels, DWORD flags ) { - if ( !IsValid(fmt) || IsVideo(fmt) || !width || !height || !arraySize ) + if ( !IsValid(fmt) || !width || !height || !arraySize ) return E_INVALIDARG; + if ( IsPalettized(fmt) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + if ( !_CalculateMipLevels(width,height,mipLevels) ) return E_INVALIDARG; @@ -340,13 +374,14 @@ HRESULT ScratchImage::Initialize2D( DXGI_FORMAT fmt, size_t width, size_t height _metadata.arraySize = arraySize; _metadata.mipLevels = mipLevels; _metadata.miscFlags = 0; + _metadata.miscFlags2 = 0; _metadata.format = fmt; _metadata.dimension = TEX_DIMENSION_TEXTURE2D; size_t pixelSize, nimages; - _DetermineImageArray( _metadata, CP_FLAGS_NONE, nimages, pixelSize ); + _DetermineImageArray( _metadata, flags, nimages, pixelSize ); - _image = new Image[ nimages ]; + _image = new (std::nothrow) Image[ nimages ]; if ( !_image ) return E_OUTOFMEMORY; @@ -360,7 +395,7 @@ HRESULT ScratchImage::Initialize2D( DXGI_FORMAT fmt, size_t width, size_t height return E_OUTOFMEMORY; } _size = pixelSize; - if ( !_SetupImageArray( _memory, pixelSize, _metadata, CP_FLAGS_NONE, _image, nimages ) ) + if ( !_SetupImageArray( _memory, pixelSize, _metadata, flags, _image, nimages ) ) { Release(); return E_FAIL; @@ -369,11 +404,15 @@ HRESULT ScratchImage::Initialize2D( DXGI_FORMAT fmt, size_t width, size_t height return S_OK; } -HRESULT ScratchImage::Initialize3D( DXGI_FORMAT fmt, size_t width, size_t height, size_t depth, size_t mipLevels ) +_Use_decl_annotations_ +HRESULT ScratchImage::Initialize3D( DXGI_FORMAT fmt, size_t width, size_t height, size_t depth, size_t mipLevels, DWORD flags ) { - if ( !IsValid(fmt) || IsVideo(fmt) || !width || !height || !depth ) + if ( !IsValid(fmt) || !width || !height || !depth ) return E_INVALIDARG; + if ( IsPalettized(fmt) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + if ( !_CalculateMipLevels3D(width,height,depth,mipLevels) ) return E_INVALIDARG; @@ -385,13 +424,14 @@ HRESULT ScratchImage::Initialize3D( DXGI_FORMAT fmt, size_t width, size_t height _metadata.arraySize = 1; // Direct3D 10.x/11 does not support arrays of 3D textures _metadata.mipLevels = mipLevels; _metadata.miscFlags = 0; + _metadata.miscFlags2 = 0; _metadata.format = fmt; _metadata.dimension = TEX_DIMENSION_TEXTURE3D; size_t pixelSize, nimages; - _DetermineImageArray( _metadata, CP_FLAGS_NONE, nimages, pixelSize ); + _DetermineImageArray( _metadata, flags, nimages, pixelSize ); - _image = new Image[ nimages ]; + _image = new (std::nothrow) Image[ nimages ]; if ( !_image ) { Release(); @@ -408,7 +448,7 @@ HRESULT ScratchImage::Initialize3D( DXGI_FORMAT fmt, size_t width, size_t height } _size = pixelSize; - if ( !_SetupImageArray( _memory, pixelSize, _metadata, CP_FLAGS_NONE, _image, nimages ) ) + if ( !_SetupImageArray( _memory, pixelSize, _metadata, flags, _image, nimages ) ) { Release(); return E_FAIL; @@ -417,13 +457,14 @@ HRESULT ScratchImage::Initialize3D( DXGI_FORMAT fmt, size_t width, size_t height return S_OK; } -HRESULT ScratchImage::InitializeCube( DXGI_FORMAT fmt, size_t width, size_t height, size_t nCubes, size_t mipLevels ) +_Use_decl_annotations_ +HRESULT ScratchImage::InitializeCube( DXGI_FORMAT fmt, size_t width, size_t height, size_t nCubes, size_t mipLevels, DWORD flags ) { - if ( !IsValid(fmt) || IsVideo(fmt) || !width || !height || !nCubes ) + if ( !width || !height || !nCubes ) return E_INVALIDARG; - + // A DirectX11 cubemap is just a 2D texture array that is a multiple of 6 for each cube - HRESULT hr = Initialize2D( fmt, width, height, nCubes * 6, mipLevels ); + HRESULT hr = Initialize2D( fmt, width, height, nCubes * 6, mipLevels, flags ); if ( FAILED(hr) ) return hr; @@ -432,34 +473,45 @@ HRESULT ScratchImage::InitializeCube( DXGI_FORMAT fmt, size_t width, size_t heig return S_OK; } -HRESULT ScratchImage::InitializeFromImage( const Image& srcImage, bool allow1D ) +_Use_decl_annotations_ +HRESULT ScratchImage::InitializeFromImage( const Image& srcImage, bool allow1D, DWORD flags ) { HRESULT hr = ( srcImage.height > 1 || !allow1D ) - ? Initialize2D( srcImage.format, srcImage.width, srcImage.height, 1, 1 ) - : Initialize1D( srcImage.format, srcImage.width, 1, 1 ); + ? Initialize2D( srcImage.format, srcImage.width, srcImage.height, 1, 1, flags ) + : Initialize1D( srcImage.format, srcImage.width, 1, 1, flags ); if ( FAILED(hr) ) return hr; + size_t rowCount = ComputeScanlines( srcImage.format, srcImage.height ); + if ( !rowCount ) + return E_UNEXPECTED; + const uint8_t* sptr = reinterpret_cast<const uint8_t*>( srcImage.pixels ); if ( !sptr ) return E_POINTER; - uint8_t* dptr = reinterpret_cast<uint8_t*>( _image[0].pixels ); + auto dptr = reinterpret_cast<uint8_t*>( _image[0].pixels ); if ( !dptr ) return E_POINTER; - for( size_t y = 0; y < srcImage.height; ++y ) + size_t spitch = srcImage.rowPitch; + size_t dpitch = _image[0].rowPitch; + + size_t size = std::min<size_t>( dpitch, spitch ); + + for( size_t y = 0; y < rowCount; ++y ) { - _CopyScanline( dptr, _image[0].rowPitch, sptr, srcImage.rowPitch, srcImage.format, TEXP_SCANLINE_NONE ); - sptr += srcImage.rowPitch; - dptr += _image[0].rowPitch; + memcpy_s( dptr, dpitch, sptr, size ); + sptr += spitch; + dptr += dpitch; } return S_OK; } -HRESULT ScratchImage::InitializeArrayFromImages( const Image* images, size_t nImages, bool allow1D ) +_Use_decl_annotations_ +HRESULT ScratchImage::InitializeArrayFromImages( const Image* images, size_t nImages, bool allow1D, DWORD flags ) { if ( !images || !nImages ) return E_INVALIDARG; @@ -481,35 +533,45 @@ HRESULT ScratchImage::InitializeArrayFromImages( const Image* images, size_t nIm } HRESULT hr = ( height > 1 || !allow1D ) - ? Initialize2D( format, width, height, nImages, 1 ) - : Initialize1D( format, width, nImages, 1 ); + ? Initialize2D( format, width, height, nImages, 1, flags ) + : Initialize1D( format, width, nImages, 1, flags ); if ( FAILED(hr) ) return hr; + size_t rowCount = ComputeScanlines( format, height ); + if ( !rowCount ) + return E_UNEXPECTED; + for( size_t index=0; index < nImages; ++index ) { - const uint8_t* sptr = reinterpret_cast<const uint8_t*>( images[index].pixels ); + auto sptr = reinterpret_cast<const uint8_t*>( images[index].pixels ); if ( !sptr ) return E_POINTER; assert( index < _nimages ); - uint8_t* dptr = reinterpret_cast<uint8_t*>( _image[index].pixels ); + auto dptr = reinterpret_cast<uint8_t*>( _image[index].pixels ); if ( !dptr ) return E_POINTER; - for( size_t y = 0; y < height; ++y ) + size_t spitch = images[index].rowPitch; + size_t dpitch = _image[index].rowPitch; + + size_t size = std::min<size_t>( dpitch, spitch ); + + for( size_t y = 0; y < rowCount; ++y ) { - _CopyScanline( dptr, _image[index].rowPitch, sptr, images[index].rowPitch, format, TEXP_SCANLINE_NONE ); - sptr += images[index].rowPitch; - dptr += _image[index].rowPitch; + memcpy_s( dptr, dpitch, sptr, size ); + sptr += spitch; + dptr += dpitch; } } return S_OK; } -HRESULT ScratchImage::InitializeCubeFromImages( const Image* images, size_t nImages ) +_Use_decl_annotations_ +HRESULT ScratchImage::InitializeCubeFromImages( const Image* images, size_t nImages, DWORD flags ) { if ( !images || !nImages ) return E_INVALIDARG; @@ -518,7 +580,7 @@ HRESULT ScratchImage::InitializeCubeFromImages( const Image* images, size_t nIma if ( ( nImages % 6 ) != 0 ) return E_INVALIDARG; - HRESULT hr = InitializeArrayFromImages( images, nImages, false ); + HRESULT hr = InitializeArrayFromImages( images, nImages, false, flags ); if ( FAILED(hr) ) return hr; @@ -527,7 +589,8 @@ HRESULT ScratchImage::InitializeCubeFromImages( const Image* images, size_t nIma return S_OK; } -HRESULT ScratchImage::Initialize3DFromImages( const Image* images, size_t depth ) +_Use_decl_annotations_ +HRESULT ScratchImage::Initialize3DFromImages( const Image* images, size_t depth, DWORD flags ) { if ( !images || !depth ) return E_INVALIDARG; @@ -548,26 +611,35 @@ HRESULT ScratchImage::Initialize3DFromImages( const Image* images, size_t depth } } - HRESULT hr = Initialize3D( format, width, height, depth, 1 ); + HRESULT hr = Initialize3D( format, width, height, depth, 1, flags ); if ( FAILED(hr) ) return hr; + size_t rowCount = ComputeScanlines( format, height ); + if ( !rowCount ) + return E_UNEXPECTED; + for( size_t slice=0; slice < depth; ++slice ) { - const uint8_t* sptr = reinterpret_cast<const uint8_t*>( images[slice].pixels ); + auto sptr = reinterpret_cast<const uint8_t*>( images[slice].pixels ); if ( !sptr ) return E_POINTER; assert( slice < _nimages ); - uint8_t* dptr = reinterpret_cast<uint8_t*>( _image[slice].pixels ); + auto dptr = reinterpret_cast<uint8_t*>( _image[slice].pixels ); if ( !dptr ) return E_POINTER; - for( size_t y = 0; y < height; ++y ) + size_t spitch = images[slice].rowPitch; + size_t dpitch = _image[slice].rowPitch; + + size_t size = std::min<size_t>( dpitch, spitch ); + + for( size_t y = 0; y < rowCount; ++y ) { - _CopyScanline( dptr, _image[slice].rowPitch, sptr, images[slice].rowPitch, format, TEXP_SCANLINE_NONE ); - sptr += images[slice].rowPitch; - dptr += _image[slice].rowPitch; + memcpy_s( dptr, dpitch, sptr, size ); + sptr += spitch; + dptr += dpitch; } } @@ -594,22 +666,15 @@ void ScratchImage::Release() memset(&_metadata, 0, sizeof(_metadata)); } +_Use_decl_annotations_ bool ScratchImage::OverrideFormat( DXGI_FORMAT f ) { if ( !_image ) return false; - if ( !IsValid( f ) || IsVideo( f ) ) + if ( !IsValid( f ) || IsPlanar( f ) || IsPalettized( f ) ) return false; - if ( ( BitsPerPixel( f ) != BitsPerPixel( _metadata.format ) ) - || ( IsCompressed( f ) != IsCompressed( _metadata.format ) ) - || ( IsPacked( f ) != IsPacked( _metadata.format ) ) ) - { - // Can't change the effective pitch of the format this way - return false; - } - for( size_t index = 0; index < _nimages; ++index ) { _image[ index ].format = f; @@ -620,6 +685,7 @@ bool ScratchImage::OverrideFormat( DXGI_FORMAT f ) return true; } +_Use_decl_annotations_ const Image* ScratchImage::GetImage(size_t mip, size_t item, size_t slice) const { if ( mip >= _metadata.mipLevels ) @@ -671,4 +737,58 @@ const Image* ScratchImage::GetImage(size_t mip, size_t item, size_t slice) const return &_image[index]; } +bool ScratchImage::IsAlphaAllOpaque() const +{ + if ( !_image ) + return false; + + if ( !HasAlpha( _metadata.format ) ) + return true; + + if ( IsCompressed( _metadata.format ) ) + { + for( size_t index = 0; index < _nimages; ++index ) + { + if ( !_IsAlphaAllOpaqueBC( _image[ index ] ) ) + return false; + } + } + else + { + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*_metadata.width), 16 ) ) ); + if ( !scanline ) + return false; + + static const XMVECTORF32 threshold = { 0.99f, 0.99f, 0.99f, 0.99f }; + + for( size_t index = 0; index < _nimages; ++index ) + { +#pragma warning( suppress : 6011 ) + const Image& img = _image[ index ]; + + const uint8_t *pPixels = img.pixels; + assert( pPixels ); + + for( size_t h = 0; h < img.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), img.width, pPixels, img.rowPitch, img.format ) ) + return false; + + XMVECTOR* ptr = scanline.get(); + for( size_t w = 0; w < img.width; ++w ) + { + XMVECTOR alpha = XMVectorSplatW( *ptr ); + if ( XMVector4Less( alpha, threshold ) ) + return false; + ++ptr; + } + + pPixels += img.rowPitch; + } + } + } + + return true; +} + }; // namespace diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexMipmaps.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexMipmaps.cpp index 1e7e27cd..1599d5c0 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexMipmaps.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexMipmaps.cpp @@ -15,12 +15,13 @@ #include "DirectXTexP.h" +#include "filters.h" + +using Microsoft::WRL::ComPtr; + namespace DirectX { -static const XMVECTORF32 s_boxScale = { 0.25f, 0.25f, 0.25f, 0.25f }; -static const XMVECTORF32 s_boxScale3D = { 0.125f, 0.125f, 0.125f, 0.125f }; - //------------------------------------------------------------------------------------- // Mipmap helper functions //------------------------------------------------------------------------------------- @@ -29,7 +30,9 @@ inline static bool ispow2( _In_ size_t x ) return ((x != 0) && !(x & (x - 1))); } -static size_t _CountMips( _In_ size_t width, _In_ size_t height) + +//--- mipmap (1D/2D) levels computation --- +static size_t _CountMips( _In_ size_t width, _In_ size_t height ) { size_t mipLevels = 1; @@ -66,7 +69,9 @@ bool _CalculateMipLevels( _In_ size_t width, _In_ size_t height, _Inout_ size_t& return true; } -static size_t _CountMips3D( _In_ size_t width, _In_ size_t height, _In_ size_t depth) + +//--- volume mipmap (3D) levels computation --- +static size_t _CountMips3D( _In_ size_t width, _In_ size_t height, _In_ size_t depth ) { size_t mipLevels = 1; @@ -91,14 +96,11 @@ bool _CalculateMipLevels3D( _In_ size_t width, _In_ size_t height, _In_ size_t d { if ( mipLevels > 1 ) { - if ( !ispow2(width) || !ispow2(height) || !ispow2(depth) ) - return false; - size_t maxMips = _CountMips3D(width,height,depth); if ( mipLevels > maxMips ) return false; } - else if ( mipLevels == 0 && ispow2(width) && ispow2(height) && ispow2(depth) ) + else if ( mipLevels == 0 ) { mipLevels = _CountMips3D(width,height,depth); } @@ -109,8 +111,12 @@ bool _CalculateMipLevels3D( _In_ size_t width, _In_ size_t height, _In_ size_t d return true; } + #ifndef __MINGW32__ +//------------------------------------------------------------------------------------- +// WIC related helper functions +//------------------------------------------------------------------------------------- static HRESULT _EnsureWicBitmapPixelFormat( _In_ IWICImagingFactory* pWIC, _In_ IWICBitmap* src, _In_ DWORD filter, _In_ const WICPixelFormatGUID& desiredPixelFormat, _Deref_out_ IWICBitmap** dest ) @@ -132,8 +138,19 @@ static HRESULT _EnsureWicBitmapPixelFormat( _In_ IWICImagingFactory* pWIC, _In_ } else { - ScopedObject<IWICFormatConverter> converter; - hr = pWIC->CreateFormatConverter( &converter ); + ComPtr<IWICFormatConverter> converter; + hr = pWIC->CreateFormatConverter( converter.GetAddressOf() ); + + if ( SUCCEEDED(hr) ) + { + BOOL canConvert = FALSE; + hr = converter->CanConvert( actualPixelFormat, desiredPixelFormat, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + } + if ( SUCCEEDED(hr) ) { hr = converter->Initialize( src, desiredPixelFormat, _GetWICDither(filter), 0, 0, WICBitmapPaletteTypeCustom ); @@ -149,6 +166,8 @@ static HRESULT _EnsureWicBitmapPixelFormat( _In_ IWICImagingFactory* pWIC, _In_ return hr; } + +//--- Resizing color and alpha channels separately using WIC --- HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBitmap* original, _In_ size_t newWidth, _In_ size_t newHeight, _In_ DWORD filter, _Inout_ const Image* img ) { @@ -168,13 +187,13 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi if ( SUCCEEDED(hr) ) { - ScopedObject<IWICComponentInfo> componentInfo; - hr = pWIC->CreateComponentInfo( desiredPixelFormat, &componentInfo ); + ComPtr<IWICComponentInfo> componentInfo; + hr = pWIC->CreateComponentInfo( desiredPixelFormat, componentInfo.GetAddressOf() ); - ScopedObject<IWICPixelFormatInfo> pixelFormatInfo; + ComPtr<IWICPixelFormatInfo> pixelFormatInfo; if ( SUCCEEDED(hr) ) { - hr = componentInfo->QueryInterface( __uuidof(IWICPixelFormatInfo), (void**)&pixelFormatInfo ); + hr = componentInfo.As( &pixelFormatInfo ); } UINT bitsPerPixel = 0; @@ -195,7 +214,7 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi } else { -#if(_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if(_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) if ( _IsWIC2() ) { colorBytesInPixel = colorBytesPerPixel = 12; @@ -216,17 +235,15 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi } // Resize color only image (no alpha channel) - ScopedObject<IWICBitmap> resizedColor; + ComPtr<IWICBitmap> resizedColor; if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmapScaler> colorScaler; - - hr = pWIC->CreateBitmapScaler(&colorScaler); + ComPtr<IWICBitmapScaler> colorScaler; + hr = pWIC->CreateBitmapScaler( colorScaler.GetAddressOf() ); if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmap> converted; - - hr = _EnsureWicBitmapPixelFormat( pWIC, original, filter, colorPixelFormat, &converted ); + ComPtr<IWICBitmap> converted; + hr = _EnsureWicBitmapPixelFormat( pWIC, original, filter, colorPixelFormat, converted.GetAddressOf() ); if ( SUCCEEDED(hr) ) { hr = colorScaler->Initialize( converted.Get(), static_cast<UINT>(newWidth), static_cast<UINT>(newHeight), interpolationMode ); @@ -235,28 +252,25 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmap> resized; - - hr = pWIC->CreateBitmapFromSource( colorScaler.Get(), WICBitmapCacheOnDemand, &resized ); + ComPtr<IWICBitmap> resized; + hr = pWIC->CreateBitmapFromSource( colorScaler.Get(), WICBitmapCacheOnDemand, resized.GetAddressOf() ); if ( SUCCEEDED(hr) ) { - hr = _EnsureWicBitmapPixelFormat( pWIC, resized.Get(), filter, colorPixelFormat, &resizedColor ); + hr = _EnsureWicBitmapPixelFormat( pWIC, resized.Get(), filter, colorPixelFormat, resizedColor.GetAddressOf() ); } } } // Resize color+alpha image - ScopedObject<IWICBitmap> resizedColorWithAlpha; + ComPtr<IWICBitmap> resizedColorWithAlpha; if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmapScaler> colorWithAlphaScaler; - - hr = pWIC->CreateBitmapScaler( &colorWithAlphaScaler ); + ComPtr<IWICBitmapScaler> colorWithAlphaScaler; + hr = pWIC->CreateBitmapScaler( colorWithAlphaScaler.GetAddressOf() ); if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmap> converted; - - hr = _EnsureWicBitmapPixelFormat( pWIC, original, filter, colorWithAlphaPixelFormat, &converted ); + ComPtr<IWICBitmap> converted; + hr = _EnsureWicBitmapPixelFormat( pWIC, original, filter, colorWithAlphaPixelFormat, converted.GetAddressOf() ); if ( SUCCEEDED(hr) ) { hr = colorWithAlphaScaler->Initialize( converted.Get(), static_cast<UINT>(newWidth), static_cast<UINT>(newHeight), interpolationMode ); @@ -265,12 +279,11 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmap> resized; - - hr = pWIC->CreateBitmapFromSource( colorWithAlphaScaler.Get(), WICBitmapCacheOnDemand, &resized ); + ComPtr<IWICBitmap> resized; + hr = pWIC->CreateBitmapFromSource( colorWithAlphaScaler.Get(), WICBitmapCacheOnDemand, resized.GetAddressOf() ); if ( SUCCEEDED(hr) ) { - hr = _EnsureWicBitmapPixelFormat( pWIC, resized.Get(), filter, colorWithAlphaPixelFormat, &resizedColorWithAlpha ); + hr = _EnsureWicBitmapPixelFormat( pWIC, resized.Get(), filter, colorWithAlphaPixelFormat, resizedColorWithAlpha.GetAddressOf() ); } } } @@ -278,13 +291,12 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi // Merge pixels (copying color channels from color only image to color+alpha image) if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmapLock> colorLock; - ScopedObject<IWICBitmapLock> colorWithAlphaLock; - - hr = resizedColor->Lock( nullptr, WICBitmapLockRead, &colorLock ); + ComPtr<IWICBitmapLock> colorLock; + ComPtr<IWICBitmapLock> colorWithAlphaLock; + hr = resizedColor->Lock( nullptr, WICBitmapLockRead, colorLock.GetAddressOf() ); if ( SUCCEEDED(hr) ) { - hr = resizedColorWithAlpha->Lock( nullptr, WICBitmapLockWrite, &colorWithAlphaLock ); + hr = resizedColorWithAlpha->Lock( nullptr, WICBitmapLockWrite, colorWithAlphaLock.GetAddressOf() ); } if ( SUCCEEDED(hr) ) @@ -339,6 +351,7 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi } else { +#pragma warning( suppress : 26014 6386 ) // No overflow possible here memcpy_s( colorWithAlphaData + colorWithAlphaIndex, colorWithAlphaBytesPerPixel, colorData + colorIndex, colorBytesInPixel ); } } @@ -348,8 +361,8 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi if ( SUCCEEDED(hr) ) { - ScopedObject<IWICBitmap> wicBitmap; - hr = _EnsureWicBitmapPixelFormat( pWIC, resizedColorWithAlpha.Get(), filter, desiredPixelFormat, &wicBitmap ); + ComPtr<IWICBitmap> wicBitmap; + hr = _EnsureWicBitmapPixelFormat( pWIC, resizedColorWithAlpha.Get(), filter, desiredPixelFormat, wicBitmap.GetAddressOf() ); if ( SUCCEEDED(hr) ) { hr = wicBitmap->CopyPixels( nullptr, static_cast<UINT>(img->rowPitch), static_cast<UINT>(img->slicePitch), img->pixels ); @@ -360,9 +373,78 @@ HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBi } -//------------------------------------------------------------------------------------- -// Generate a (2D) mip-map chain from a base image using WIC's image scaler -//------------------------------------------------------------------------------------- +//--- determine when to use WIC vs. non-WIC paths --- +static bool _UseWICFiltering( _In_ DXGI_FORMAT format, _In_ DWORD filter ) +{ + if ( filter & TEX_FILTER_FORCE_NON_WIC ) + { + // Explicit flag indicates use of non-WIC code paths + return false; + } + + if ( filter & TEX_FILTER_FORCE_WIC ) + { + // Explicit flag to use WIC code paths, skips all the case checks below + return true; + } + + if ( IsSRGB(format) || (filter & TEX_FILTER_SRGB) ) + { + // Use non-WIC code paths for sRGB correct filtering + return false; + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT + || format == DXGI_FORMAT_R16_FLOAT ) + { + // Use non-WIC code paths as these conversions are not supported by Xbox One XDK + return false; + } +#endif + + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); + + switch ( filter & TEX_FILTER_MASK ) + { + case TEX_FILTER_LINEAR: + if ( filter & TEX_FILTER_WRAP ) + { + // WIC only supports 'clamp' semantics (MIRROR is equivalent to clamp for linear) + return false; + } + + if ( BitsPerColor(format) > 8 ) + { + // Avoid the WIC bitmap scaler when doing Linear filtering of XR/HDR formats + return false; + } + break; + + case TEX_FILTER_CUBIC: + if ( filter & ( TEX_FILTER_WRAP | TEX_FILTER_MIRROR ) ) + { + // WIC only supports 'clamp' semantics + return false; + } + + if ( BitsPerColor(format) > 8 ) + { + // Avoid the WIC bitmap scaler when doing Cubic filtering of XR/HDR formats + return false; + } + break; + + case TEX_FILTER_TRIANGLE: + // WIC does not implement this filter + return false; + } + + return true; +} + + +//--- mipmap (1D/2D) generation using WIC image scalar --- static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD filter, _In_ size_t levels, _In_ const WICPixelFormatGUID& pfGUID, _In_ const ScratchImage& mipChain, _In_ size_t item ) { @@ -378,10 +460,10 @@ static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD size_t width = baseImage.width; size_t height = baseImage.height; - ScopedObject<IWICBitmap> source; + ComPtr<IWICBitmap> source; HRESULT hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( width ), static_cast<UINT>( height ), pfGUID, static_cast<UINT>( baseImage.rowPitch ), static_cast<UINT>( baseImage.slicePitch ), - baseImage.pixels, &source ); + baseImage.pixels, source.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -403,13 +485,13 @@ static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD pDest += img0->rowPitch; } - ScopedObject<IWICComponentInfo> componentInfo; - hr = pWIC->CreateComponentInfo( pfGUID, &componentInfo ); + ComPtr<IWICComponentInfo> componentInfo; + hr = pWIC->CreateComponentInfo( pfGUID, componentInfo.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICPixelFormatInfo2> pixelFormatInfo; - hr = componentInfo->QueryInterface( __uuidof(IWICPixelFormatInfo2), (void**)&pixelFormatInfo ); + ComPtr<IWICPixelFormatInfo2> pixelFormatInfo; + hr = componentInfo.As( &pixelFormatInfo ); if ( FAILED(hr) ) return hr; @@ -441,8 +523,8 @@ static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD } else { - ScopedObject<IWICBitmapScaler> scaler; - hr = pWIC->CreateBitmapScaler( &scaler ); + ComPtr<IWICBitmapScaler> scaler; + hr = pWIC->CreateBitmapScaler( scaler.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -465,11 +547,18 @@ static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD { // The WIC bitmap scaler is free to return a different pixel format than the source image, so here we // convert it back - ScopedObject<IWICFormatConverter> FC; - hr = pWIC->CreateFormatConverter( &FC ); + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; + BOOL canConvert = FALSE; + hr = FC->CanConvert( pfScaler, pfGUID, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + hr = FC->Initialize( scaler.Get(), pfGUID, _GetWICDither( filter ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; @@ -486,9 +575,750 @@ static HRESULT _GenerateMipMapsUsingWIC( _In_ const Image& baseImage, _In_ DWORD //------------------------------------------------------------------------------------- +// Generate (1D/2D) mip-map helpers (custom filtering) +//------------------------------------------------------------------------------------- +static HRESULT _Setup2DMips( _In_reads_(nimages) const Image* baseImages, _In_ size_t nimages, _In_ const TexMetadata& mdata, + _Out_ ScratchImage& mipChain ) +{ + if ( !baseImages || !nimages ) + return E_INVALIDARG; + + assert( mdata.mipLevels > 1 ); + assert( mdata.arraySize == nimages ); + assert( mdata.depth == 1 && mdata.dimension != TEX_DIMENSION_TEXTURE3D ); + assert( mdata.width == baseImages[0].width ); + assert( mdata.height == baseImages[0].height ); + assert( mdata.format == baseImages[0].format ); + + HRESULT hr = mipChain.Initialize( mdata ); + if ( FAILED(hr) ) + return hr; + + // Copy base image(s) to top of mip chain + for( size_t item=0; item < nimages; ++item ) + { + const Image& src = baseImages[item]; + + const Image *dest = mipChain.GetImage( 0, item, 0 ); + if ( !dest ) + { + mipChain.Release(); + return E_POINTER; + } + + assert( src.format == dest->format ); + + uint8_t* pDest = dest->pixels; + if ( !pDest ) + { + mipChain.Release(); + return E_POINTER; + } + + const uint8_t *pSrc = src.pixels; + size_t rowPitch = src.rowPitch; + for( size_t h=0; h < mdata.height; ++h ) + { + size_t msize = std::min<size_t>( dest->rowPitch, rowPitch ); + memcpy_s( pDest, dest->rowPitch, pSrc, msize ); + pSrc += rowPitch; + pDest += dest->rowPitch; + } + } + + return S_OK; +} + +//--- 2D Point Filter --- +static HRESULT _Generate2DMipsPointFilter( _In_ size_t levels, _In_ const ScratchImage& mipChain, _In_ size_t item ) +{ + if ( !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate temporary space (2 scanlines) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*2), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row = target + width; + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*width ); +#endif + + // 2D point filter + const Image* src = mipChain.GetImage( level-1, item, 0 ); + const Image* dest = mipChain.GetImage( level, item, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; + + size_t xinc = ( width << 16 ) / nwidth; + size_t yinc = ( height << 16 ) / nheight; + + size_t lasty = size_t(-1); + + size_t sy = 0; + for( size_t y = 0; y < nheight; ++y ) + { + if ( (lasty ^ sy) >> 16 ) + { + if ( !_LoadScanline( row, width, pSrc + ( rowPitch * (sy >> 16) ), rowPitch, src->format ) ) + return E_FAIL; + lasty = sy; + } + + size_t sx = 0; + for( size_t x = 0; x < nwidth; ++x ) + { + target[ x ] = row[ sx >> 16 ]; + sx += xinc; + } + + if ( !_StoreScanline( pDest, dest->rowPitch, dest->format, target, nwidth ) ) + return E_FAIL; + pDest += dest->rowPitch; + + sy += yinc; + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + } + + return S_OK; +} + + +//--- 2D Box Filter --- +static HRESULT _Generate2DMipsBoxFilter( _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain, _In_ size_t item ) +{ + if ( !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + if ( !ispow2(width) || !ispow2(height) ) + return E_FAIL; + + // Allocate temporary space (3 scanlines) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*3), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* urow0 = target + width; + XMVECTOR* urow1 = target + width*2; + + const XMVECTOR* urow2 = urow0 + 1; + const XMVECTOR* urow3 = urow1 + 1; + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + if ( height <= 1 ) + { + urow1 = urow0; + } + + if ( width <= 1 ) + { + urow2 = urow0; + urow3 = urow1; + } + + // 2D box filter + const Image* src = mipChain.GetImage( level-1, item, 0 ); + const Image* dest = mipChain.GetImage( level, item, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; + + for( size_t y = 0; y < nheight; ++y ) + { + if ( !_LoadScanlineLinear( urow0, width, pSrc, rowPitch, src->format, filter ) ) + return E_FAIL; + pSrc += rowPitch; + + if ( urow0 != urow1 ) + { + if ( !_LoadScanlineLinear( urow1, width, pSrc, rowPitch, src->format, filter ) ) + return E_FAIL; + pSrc += rowPitch; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + size_t x2 = x << 1; + + AVERAGE4( target[ x ], urow0[ x2 ], urow1[ x2 ], urow2[ x2 ], urow3[ x2 ] ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + } + + return S_OK; +} + + +//--- 2D Linear Filter --- +static HRESULT _Generate2DMipsLinearFilter( _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain, _In_ size_t item ) +{ + if ( !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate temporary space (3 scanlines, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*3), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<LinearFilter[]> lf( new (std::nothrow) LinearFilter[ width+height ] ); + if ( !lf ) + return E_OUTOFMEMORY; + + LinearFilter* lfX = lf.get(); + LinearFilter* lfY = lf.get() + width; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row0 = target + width; + XMVECTOR* row1 = target + width*2; + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + // 2D linear filter + const Image* src = mipChain.GetImage( level-1, item, 0 ); + const Image* dest = mipChain.GetImage( level, item, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t nwidth = (width > 1) ? (width >> 1) : 1; + _CreateLinearFilter( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, lfX ); + + size_t nheight = (height > 1) ? (height >> 1) : 1; + _CreateLinearFilter( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, lfY ); + +#ifdef _DEBUG + memset( row0, 0xCD, sizeof(XMVECTOR)*width ); + memset( row1, 0xDD, sizeof(XMVECTOR)*width ); +#endif + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = lfY[ y ]; + + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( row0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else + { + u0 = u1; + u1 = size_t(-1); + + std::swap( row0, row1 ); + } + } + + if ( toY.u1 != u1 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( row1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + auto& toX = lfX[ x ]; + + BILINEAR_INTERPOLATE( target[x], toX, toY, row0, row1 ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + } + + return S_OK; +} + + +//--- 2D Cubic Filter --- +static HRESULT _Generate2DMipsCubicFilter( _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain, _In_ size_t item ) +{ + if ( !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate temporary space (5 scanlines, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*5), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<CubicFilter[]> cf( new (std::nothrow) CubicFilter[ width+height ] ); + if ( !cf ) + return E_OUTOFMEMORY; + + CubicFilter* cfX = cf.get(); + CubicFilter* cfY = cf.get() + width; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row0 = target + width; + XMVECTOR* row1 = target + width*2; + XMVECTOR* row2 = target + width*3; + XMVECTOR* row3 = target + width*4; + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + // 2D cubic filter + const Image* src = mipChain.GetImage( level-1, item, 0 ); + const Image* dest = mipChain.GetImage( level, item, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t nwidth = (width > 1) ? (width >> 1) : 1; + _CreateCubicFilter( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX ); + + size_t nheight = (height > 1) ? (height >> 1) : 1; + _CreateCubicFilter( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY ); + +#ifdef _DEBUG + memset( row0, 0xCD, sizeof(XMVECTOR)*width ); + memset( row1, 0xDD, sizeof(XMVECTOR)*width ); + memset( row2, 0xED, sizeof(XMVECTOR)*width ); + memset( row3, 0xFD, sizeof(XMVECTOR)*width ); +#endif + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + size_t u2 = size_t(-1); + size_t u3 = size_t(-1); + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = cfY[ y ]; + + // Scanline 1 + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( row0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else if ( toY.u0 == u1 ) + { + u0 = u1; + u1 = size_t(-1); + + std::swap( row0, row1 ); + } + else if ( toY.u0 == u2 ) + { + u0 = u2; + u2 = size_t(-1); + + std::swap( row0, row2 ); + } + else if ( toY.u0 == u3 ) + { + u0 = u3; + u3 = size_t(-1); + + std::swap( row0, row3 ); + } + } + + // Scanline 2 + if ( toY.u1 != u1 ) + { + if ( toY.u1 != u2 && toY.u1 != u3 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( row1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else if ( toY.u1 == u2 ) + { + u1 = u2; + u2 = size_t(-1); + + std::swap( row1, row2 ); + } + else if ( toY.u1 == u3 ) + { + u1 = u3; + u3 = size_t(-1); + + std::swap( row1, row3 ); + } + } + + // Scanline 3 + if ( toY.u2 != u2 ) + { + if ( toY.u2 != u3 ) + { + u2 = toY.u2; + + if ( !_LoadScanlineLinear( row2, width, pSrc + (rowPitch * u2), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else + { + u2 = u3; + u3 = size_t(-1); + + std::swap( row2, row3 ); + } + } + + // Scanline 4 + if ( toY.u3 != u3 ) + { + u3 = toY.u3; + + if ( !_LoadScanlineLinear( row3, width, pSrc + (rowPitch * u3), rowPitch, src->format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + auto& toX = cfX[ x ]; + + XMVECTOR C0, C1, C2, C3; + + CUBIC_INTERPOLATE( C0, toX.x, row0[ toX.u0 ], row0[ toX.u1 ], row0[ toX.u2 ], row0[ toX.u3 ] ); + CUBIC_INTERPOLATE( C1, toX.x, row1[ toX.u0 ], row1[ toX.u1 ], row1[ toX.u2 ], row1[ toX.u3 ] ); + CUBIC_INTERPOLATE( C2, toX.x, row2[ toX.u0 ], row2[ toX.u1 ], row2[ toX.u2 ], row2[ toX.u3 ] ); + CUBIC_INTERPOLATE( C3, toX.x, row3[ toX.u0 ], row3[ toX.u1 ], row3[ toX.u2 ], row3[ toX.u3 ] ); + + CUBIC_INTERPOLATE( target[x], toY.x, C0, C1, C2, C3 ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + } + + return S_OK; +} + + +//--- 2D Triangle Filter --- +static HRESULT _Generate2DMipsTriangleFilter( _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain, _In_ size_t item ) +{ + if ( !mipChain.GetImages() ) + return E_INVALIDARG; + + using namespace TriangleFilter; + + // This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate initial temporary space (1 scanline, accumulation rows, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR) * width, 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<TriangleRow[]> rowActive( new (std::nothrow) TriangleRow[ height ] ); + if ( !rowActive ) + return E_OUTOFMEMORY; + + TriangleRow * rowFree = nullptr; + + std::unique_ptr<Filter> tfX, tfY; + + XMVECTOR* row = scanline.get(); + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + // 2D triangle filter + const Image* src = mipChain.GetImage( level-1, item, 0 ); + const Image* dest = mipChain.GetImage( level, item, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + size_t rowPitch = src->rowPitch; + const uint8_t* pEndSrc = pSrc + rowPitch * height; + + uint8_t* pDest = dest->pixels; + + size_t nwidth = (width > 1) ? (width >> 1) : 1; + HRESULT hr = _Create( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, tfX ); + if ( FAILED(hr) ) + return hr; + + size_t nheight = (height > 1) ? (height >> 1) : 1; + hr = _Create( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, tfY ); + if ( FAILED(hr) ) + return hr; + +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*width ); +#endif + + auto xFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfX.get() ) + tfX->sizeInBytes ); + auto yFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfY.get() ) + tfY->sizeInBytes ); + + // Count times rows get written (and clear out any leftover accumulation rows from last miplevel) + for( FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; ) + { + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < nheight ); + TriangleRow* rowAcc = &rowActive[ v ]; + + ++rowAcc->remaining; + + if ( rowAcc->scanline ) + { + memset( rowAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth ); + } + } + + yFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( yFrom ) + yFrom->sizeInBytes ); + } + + // Filter image + for( FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; ) + { + // Create accumulation rows as needed + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < nheight ); + TriangleRow* rowAcc = &rowActive[ v ]; + + if ( !rowAcc->scanline ) + { + if ( rowFree ) + { + // Steal and reuse scanline from 'free row' list + // (it will always be at least as wide as nwidth due to loop decending order) + assert( rowFree->scanline != 0 ); + rowAcc->scanline.reset( rowFree->scanline.release() ); + rowFree = rowFree->next; + } + else + { + rowAcc->scanline.reset( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR) * nwidth, 16 ) ) ); + if ( !rowAcc->scanline ) + return E_OUTOFMEMORY; + } + + memset( rowAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth ); + } + } + + // Load source scanline + if ( (pSrc + rowPitch) > pEndSrc ) + return E_FAIL; + + if ( !_LoadScanlineLinear( row, width, pSrc, rowPitch, src->format, filter ) ) + return E_FAIL; + + pSrc += rowPitch; + + // Process row + size_t x = 0; + for( FilterFrom* xFrom = tfX->from; xFrom < xFromEnd; ++x ) + { + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < nheight ); + float yweight = yFrom->to[ j ].weight; + + XMVECTOR* accPtr = rowActive[ v ].scanline.get(); + if ( !accPtr ) + return E_POINTER; + + for ( size_t k = 0; k < xFrom->count; ++k ) + { + size_t u = xFrom->to[ k ].u; + assert( u < nwidth ); + + XMVECTOR weight = XMVectorReplicate( yweight * xFrom->to[ k ].weight ); + + assert( x < width ); + accPtr[ u ] = XMVectorMultiplyAdd( row[ x ], weight, accPtr[ u ] ); + } + } + + xFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( xFrom ) + xFrom->sizeInBytes ); + } + + // Write completed accumulation rows + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < nheight ); + TriangleRow* rowAcc = &rowActive[ v ]; + + assert( rowAcc->remaining > 0 ); + --rowAcc->remaining; + + if ( !rowAcc->remaining ) + { + XMVECTOR* pAccSrc = rowAcc->scanline.get(); + if ( !pAccSrc ) + return E_POINTER; + + switch( dest->format ) + { + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + { + // Need to slightly bias results for floating-point error accumulation which can + // be visible with harshly quantized values + static const XMVECTORF32 Bias = { 0.f, 0.f, 0.f, 0.1f }; + + XMVECTOR* ptr = pAccSrc; + for( size_t i=0; i < dest->width; ++i, ++ptr ) + { + *ptr = XMVectorAdd( *ptr, Bias ); + } + } + break; + } + + // This performs any required clamping + if ( !_StoreScanlineLinear( pDest + (dest->rowPitch * v), dest->rowPitch, dest->format, pAccSrc, dest->width, filter ) ) + return E_FAIL; + + // Put row on freelist to reuse it's allocated scanline + rowAcc->next = rowFree; + rowFree = rowAcc; + } + } + + yFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( yFrom ) + yFrom->sizeInBytes ); + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- // Generate volume mip-map helpers //------------------------------------------------------------------------------------- -static HRESULT _Setup3DMips( _In_count_(depth) const Image* baseImages, _In_ size_t depth, size_t levels, +static HRESULT _Setup3DMips( _In_reads_(depth) const Image* baseImages, _In_ size_t depth, size_t levels, _Out_ ScratchImage& mipChain ) { if ( !baseImages || !depth ) @@ -538,6 +1368,8 @@ static HRESULT _Setup3DMips( _In_count_(depth) const Image* baseImages, _In_ siz return S_OK; } + +//--- 3D Point Filter --- static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels, _In_ const ScratchImage& mipChain ) { if ( !depth || !mipChain.GetImages() ) @@ -550,8 +1382,6 @@ static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels size_t width = mipChain.GetMetadata().width; size_t height = mipChain.GetMetadata().height; - assert( ispow2(width) && ispow2(height) && ispow2(depth) ); - // Allocate temporary space (2 scanlines) ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*2), 16 ) ) ); if ( !scanline ) @@ -564,13 +1394,22 @@ static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels // Resize base image to each target mip level for( size_t level=1; level < levels; ++level ) { +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*width ); +#endif + if ( depth > 1 ) { // 3D point filter - for( size_t slice=0; slice < depth; slice += 2 ) + size_t ndepth = depth >> 1; + + size_t zinc = ( depth << 16 ) / ndepth; + + size_t sz = 0; + for( size_t slice=0; slice < ndepth; ++slice ) { - const Image* src = mipChain.GetImage( level-1, 0, slice ); - const Image* dest = mipChain.GetImage( level, 0, slice >> 1 ); + const Image* src = mipChain.GetImage( level-1, 0, (sz >> 16) ); + const Image* dest = mipChain.GetImage( level, 0, slice ); if ( !src || !dest ) return E_POINTER; @@ -580,25 +1419,39 @@ static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels size_t rowPitch = src->rowPitch; - size_t nheight = height >> 1; + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; + size_t xinc = ( width << 16 ) / nwidth; + size_t yinc = ( height << 16 ) / nheight; + + size_t lasty = size_t(-1); + + size_t sy = 0; for( size_t y = 0; y < nheight; ++y ) { - if ( !_LoadScanline( row, width, pSrc, rowPitch, src->format ) ) - return E_FAIL; - pSrc += rowPitch*2; - - size_t nwidth = width >> 1; + if ( (lasty ^ sy) >> 16 ) + { + if ( !_LoadScanline( row, width, pSrc + ( rowPitch * (sy >> 16) ), rowPitch, src->format ) ) + return E_FAIL; + lasty = sy; + } + size_t sx = 0; for( size_t x = 0; x < nwidth; ++x ) { - target[ x ] = row[ x*2 ]; + target[ x ] = row[ sx >> 16 ]; + sx += xinc; } if ( !_StoreScanline( pDest, dest->rowPitch, dest->format, target, nwidth ) ) return E_FAIL; pDest += dest->rowPitch; + + sy += yinc; } + + sz += zinc; } } else @@ -615,24 +1468,36 @@ static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels size_t rowPitch = src->rowPitch; - size_t nheight = height >> 1; + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; + size_t xinc = ( width << 16 ) / nwidth; + size_t yinc = ( height << 16 ) / nheight; + + size_t lasty = size_t(-1); + + size_t sy = 0; for( size_t y = 0; y < nheight; ++y ) { - if ( !_LoadScanline( row, width, pSrc, rowPitch, src->format ) ) - return E_FAIL; - pSrc += rowPitch*2; - - size_t nwidth = width >> 1; + if ( (lasty ^ sy) >> 16 ) + { + if ( !_LoadScanline( row, width, pSrc + ( rowPitch * (sy >> 16) ), rowPitch, src->format ) ) + return E_FAIL; + lasty = sy; + } + size_t sx = 0; for( size_t x = 0; x < nwidth; ++x ) { - target[ x ] = row[ x*2 ]; + target[ x ] = row[ sx >> 16 ]; + sx += xinc; } if ( !_StoreScanline( pDest, dest->rowPitch, dest->format, target, nwidth ) ) return E_FAIL; pDest += dest->rowPitch; + + sy += yinc; } } @@ -646,12 +1511,12 @@ static HRESULT _Generate3DMipsPointFilter( _In_ size_t depth, _In_ size_t levels depth >>= 1; } - assert( height == 1 && width == 1 && depth == 1 ); - return S_OK; } -static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, _In_ const ScratchImage& mipChain ) + +//--- 3D Box Filter --- +static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain ) { if ( !depth || !mipChain.GetImages() ) return E_INVALIDARG; @@ -663,7 +1528,8 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, size_t width = mipChain.GetMetadata().width; size_t height = mipChain.GetMetadata().height; - assert( ispow2(width) && ispow2(height) && ispow2(depth) ); + if ( !ispow2(width) || !ispow2(height) || !ispow2(depth) ) + return E_FAIL; // Allocate temporary space (5 scanlines) ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*5), 16 ) ) ); @@ -685,13 +1551,13 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, // Resize base image to each target mip level for( size_t level=1; level < levels; ++level ) { - if ( height == 1) + if ( height <= 1 ) { - urow0 = vrow0; - urow1 = vrow1; + urow1 = urow0; + vrow1 = vrow0; } - if ( width == 1 ) + if ( width <= 1 ) { urow2 = urow0; urow3 = urow1; @@ -702,11 +1568,16 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, if ( depth > 1 ) { // 3D box filter - for( size_t slice=0; slice < depth; slice += 2 ) + size_t ndepth = depth >> 1; + + for( size_t slice=0; slice < ndepth; ++slice ) { - const Image* srca = mipChain.GetImage( level-1, 0, slice ); - const Image* srcb = mipChain.GetImage( level-1, 0, slice+1 ); - const Image* dest = mipChain.GetImage( level, 0, slice >> 1 ); + size_t slicea = std::min<size_t>( slice * 2, depth-1 ); + size_t sliceb = std::min<size_t>( slicea + 1, depth-1 ); + + const Image* srca = mipChain.GetImage( level-1, 0, slicea ); + const Image* srcb = mipChain.GetImage( level-1, 0, sliceb ); + const Image* dest = mipChain.GetImage( level, 0, slice ); if ( !srca || !srcb || !dest ) return E_POINTER; @@ -718,54 +1589,42 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, size_t aRowPitch = srca->rowPitch; size_t bRowPitch = srcb->rowPitch; - size_t nheight = height >> 1; + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; for( size_t y = 0; y < nheight; ++y ) { - if ( !_LoadScanline( urow0, width, pSrc1, aRowPitch, srca->format ) ) + if ( !_LoadScanlineLinear( urow0, width, pSrc1, aRowPitch, srca->format, filter ) ) return E_FAIL; pSrc1 += aRowPitch; if ( urow0 != urow1 ) { - if ( !_LoadScanline( urow1, width, pSrc1, aRowPitch, srca->format ) ) + if ( !_LoadScanlineLinear( urow1, width, pSrc1, aRowPitch, srca->format, filter ) ) return E_FAIL; pSrc1 += aRowPitch; } - if ( urow0 != vrow0 ) - { - if ( !_LoadScanline( vrow0, width, pSrc2, bRowPitch, srcb->format ) ) - return E_FAIL; - pSrc2 += bRowPitch; - } + if ( !_LoadScanlineLinear( vrow0, width, pSrc2, bRowPitch, srcb->format, filter ) ) + return E_FAIL; + pSrc2 += bRowPitch; - if ( urow0 != vrow1 && vrow0 != vrow1 ) + if ( vrow0 != vrow1 ) { - if ( !_LoadScanline( vrow1, width, pSrc2, bRowPitch, srcb->format ) ) + if ( !_LoadScanlineLinear( vrow1, width, pSrc2, bRowPitch, srcb->format, filter ) ) return E_FAIL; pSrc2 += bRowPitch; } - size_t nwidth = width >> 1; - for( size_t x = 0; x < nwidth; ++x ) { - size_t x2 = x*2; - - // Box filter: Average 2x2x2 pixels - XMVECTOR v = XMVectorAdd( urow0[ x2 ], urow1[ x2 ] ); - v = XMVectorAdd( v, urow2[ x2 ] ); - v = XMVectorAdd( v, urow3[ x2 ] ); - v = XMVectorAdd( v, vrow0[ x2 ] ); - v = XMVectorAdd( v, vrow1[ x2 ] ); - v = XMVectorAdd( v, vrow2[ x2 ] ); - v = XMVectorAdd( v, vrow3[ x2 ] ); - - target[ x ] = XMVectorMultiply( v, s_boxScale3D ); + size_t x2 = x << 1; + + AVERAGE8( target[x], urow0[ x2 ], urow1[ x2 ], urow2[ x2 ], urow3[ x2 ], + vrow0[ x2 ], vrow1[ x2 ], vrow2[ x2 ], vrow3[ x2 ] ); } - if ( !_StoreScanline( pDest, dest->rowPitch, dest->format, target, nwidth ) ) + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) return E_FAIL; pDest += dest->rowPitch; } @@ -785,36 +1644,602 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, size_t rowPitch = src->rowPitch; - size_t nheight = height >> 1; + size_t nwidth = (width > 1) ? (width >> 1) : 1; + size_t nheight = (height > 1) ? (height >> 1) : 1; for( size_t y = 0; y < nheight; ++y ) { - if ( !_LoadScanline( urow0, width, pSrc, rowPitch, src->format ) ) + if ( !_LoadScanlineLinear( urow0, width, pSrc, rowPitch, src->format, filter ) ) return E_FAIL; pSrc += rowPitch; if ( urow0 != urow1 ) { - if ( !_LoadScanline( urow1, width, pSrc, rowPitch, src->format ) ) + if ( !_LoadScanlineLinear( urow1, width, pSrc, rowPitch, src->format, filter ) ) return E_FAIL; pSrc += rowPitch; } - size_t nwidth = width >> 1; + for( size_t x = 0; x < nwidth; ++x ) + { + size_t x2 = x << 1; + + AVERAGE4( target[ x ], urow0[ x2 ], urow1[ x2 ], urow2[ x2 ], urow3[ x2 ] ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + + if ( depth > 1 ) + depth >>= 1; + } + + return S_OK; +} + + +//--- 3D Linear Filter --- +static HRESULT _Generate3DMipsLinearFilter( _In_ size_t depth, _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain ) +{ + if ( !depth || !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate temporary space (5 scanlines, plus X/Y/Z filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*5), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<LinearFilter[]> lf( new (std::nothrow) LinearFilter[ width+height+depth ] ); + if ( !lf ) + return E_OUTOFMEMORY; + + LinearFilter* lfX = lf.get(); + LinearFilter* lfY = lf.get() + width; + LinearFilter* lfZ = lf.get() + width + height; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* urow0 = target + width; + XMVECTOR* urow1 = target + width*2; + XMVECTOR* vrow0 = target + width*3; + XMVECTOR* vrow1 = target + width*4; + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + size_t nwidth = (width > 1) ? (width >> 1) : 1; + _CreateLinearFilter( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, lfX ); + + size_t nheight = (height > 1) ? (height >> 1) : 1; + _CreateLinearFilter( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, lfY ); + +#ifdef _DEBUG + memset( urow0, 0xCD, sizeof(XMVECTOR)*width ); + memset( urow1, 0xDD, sizeof(XMVECTOR)*width ); + memset( vrow0, 0xED, sizeof(XMVECTOR)*width ); + memset( vrow1, 0xFD, sizeof(XMVECTOR)*width ); +#endif + + if ( depth > 1 ) + { + // 3D linear filter + size_t ndepth = depth >> 1; + _CreateLinearFilter( depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, lfZ ); + + for( size_t slice=0; slice < ndepth; ++slice ) + { + auto& toZ = lfZ[ slice ]; + + const Image* srca = mipChain.GetImage( level-1, 0, toZ.u0 ); + const Image* srcb = mipChain.GetImage( level-1, 0, toZ.u1 ); + if ( !srca || !srcb ) + return E_POINTER; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + + const Image* dest = mipChain.GetImage( level, 0, slice ); + if ( !dest ) + return E_POINTER; + + uint8_t* pDest = dest->pixels; + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = lfY[ y ]; + + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( urow0, width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( vrow0, width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter ) ) + return E_FAIL; + } + else + { + u0 = u1; + u1 = size_t(-1); + + std::swap( urow0, urow1 ); + std::swap( vrow0, vrow1 ); + } + } + + if ( toY.u1 != u1 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( urow1, width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( vrow1, width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + auto& toX = lfX[ x ]; + + TRILINEAR_INTERPOLATE( target[x], toX, toY, toZ, urow0, urow1, vrow0, vrow1 ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + } + } + else + { + // 2D linear filter + const Image* src = mipChain.GetImage( level-1, 0, 0 ); + const Image* dest = mipChain.GetImage( level, 0, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = lfY[ y ]; + + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( urow0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else + { + u0 = u1; + u1 = size_t(-1); + + std::swap( urow0, urow1 ); + } + } + + if ( toY.u1 != u1 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( urow1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + auto& toX = lfX[ x ]; + + BILINEAR_INTERPOLATE( target[x], toX, toY, urow0, urow1 ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + + if ( depth > 1 ) + depth >>= 1; + } + + return S_OK; +} + + +//--- 3D Cubic Filter --- +static HRESULT _Generate3DMipsCubicFilter( _In_ size_t depth, _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain ) +{ + if ( !depth || !mipChain.GetImages() ) + return E_INVALIDARG; + + // This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate temporary space (17 scanlines, plus X/Y/Z filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width*17), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<CubicFilter[]> cf( new (std::nothrow) CubicFilter[ width+height+depth ] ); + if ( !cf ) + return E_OUTOFMEMORY; + + CubicFilter* cfX = cf.get(); + CubicFilter* cfY = cf.get() + width; + CubicFilter* cfZ = cf.get() + width + height; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* urow[4]; + XMVECTOR* vrow[4]; + XMVECTOR* srow[4]; + XMVECTOR* trow[4]; + + XMVECTOR *ptr = scanline.get() + width; + for( size_t j = 0; j < 4; ++j ) + { + urow[j] = ptr; ptr += width; + vrow[j] = ptr; ptr += width; + srow[j] = ptr; ptr += width; + trow[j] = ptr; ptr += width; + } + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + size_t nwidth = (width > 1) ? (width >> 1) : 1; + _CreateCubicFilter( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX ); + + size_t nheight = (height > 1) ? (height >> 1) : 1; + _CreateCubicFilter( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY ); + +#ifdef _DEBUG + for( size_t j = 0; j < 4; ++j ) + { + memset( urow[j], 0xCD, sizeof(XMVECTOR)*width ); + memset( vrow[j], 0xDD, sizeof(XMVECTOR)*width ); + memset( srow[j], 0xED, sizeof(XMVECTOR)*width ); + memset( trow[j], 0xFD, sizeof(XMVECTOR)*width ); + } +#endif + + if ( depth > 1 ) + { + // 3D cubic filter + size_t ndepth = depth >> 1; + _CreateCubicFilter( depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, (filter & TEX_FILTER_MIRROR_W) != 0, cfZ ); + + for( size_t slice=0; slice < ndepth; ++slice ) + { + auto& toZ = cfZ[ slice ]; + + const Image* srca = mipChain.GetImage( level-1, 0, toZ.u0 ); + const Image* srcb = mipChain.GetImage( level-1, 0, toZ.u1 ); + const Image* srcc = mipChain.GetImage( level-1, 0, toZ.u2 ); + const Image* srcd = mipChain.GetImage( level-1, 0, toZ.u3 ); + if ( !srca || !srcb || !srcc || !srcd ) + return E_POINTER; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + size_t u2 = size_t(-1); + size_t u3 = size_t(-1); + + const Image* dest = mipChain.GetImage( level, 0, slice ); + if ( !dest ) + return E_POINTER; + + uint8_t* pDest = dest->pixels; + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = cfY[ y ]; + + // Scanline 1 + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( urow[0], width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( urow[1], width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter ) + || !_LoadScanlineLinear( urow[2], width, srcc->pixels + (srcc->rowPitch * u0), srcc->rowPitch, srcc->format, filter ) + || !_LoadScanlineLinear( urow[3], width, srcd->pixels + (srcd->rowPitch * u0), srcd->rowPitch, srcd->format, filter ) ) + return E_FAIL; + } + else if ( toY.u0 == u1 ) + { + u0 = u1; + u1 = size_t(-1); + + std::swap( urow[0], vrow[0] ); + std::swap( urow[1], vrow[1] ); + std::swap( urow[2], vrow[2] ); + std::swap( urow[3], vrow[3] ); + } + else if ( toY.u0 == u2 ) + { + u0 = u2; + u2 = size_t(-1); + + std::swap( urow[0], srow[0] ); + std::swap( urow[1], srow[1] ); + std::swap( urow[2], srow[2] ); + std::swap( urow[3], srow[3] ); + } + else if ( toY.u0 == u3 ) + { + u0 = u3; + u3 = size_t(-1); + + std::swap( urow[0], trow[0] ); + std::swap( urow[1], trow[1] ); + std::swap( urow[2], trow[2] ); + std::swap( urow[3], trow[3] ); + } + } + + // Scanline 2 + if ( toY.u1 != u1 ) + { + if ( toY.u1 != u2 && toY.u1 != u3 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( vrow[0], width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( vrow[1], width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter ) + || !_LoadScanlineLinear( vrow[2], width, srcc->pixels + (srcc->rowPitch * u1), srcc->rowPitch, srcc->format, filter ) + || !_LoadScanlineLinear( vrow[3], width, srcd->pixels + (srcd->rowPitch * u1), srcd->rowPitch, srcd->format, filter ) ) + return E_FAIL; + } + else if ( toY.u1 == u2 ) + { + u1 = u2; + u2 = size_t(-1); + + std::swap( vrow[0], srow[0] ); + std::swap( vrow[1], srow[1] ); + std::swap( vrow[2], srow[2] ); + std::swap( vrow[3], srow[3] ); + } + else if ( toY.u1 == u3 ) + { + u1 = u3; + u3 = size_t(-1); + + std::swap( vrow[0], trow[0] ); + std::swap( vrow[1], trow[1] ); + std::swap( vrow[2], trow[2] ); + std::swap( vrow[3], trow[3] ); + } + } + + // Scanline 3 + if ( toY.u2 != u2 ) + { + if ( toY.u2 != u3 ) + { + u2 = toY.u2; + + if ( !_LoadScanlineLinear( srow[0], width, srca->pixels + (srca->rowPitch * u2), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( srow[1], width, srcb->pixels + (srcb->rowPitch * u2), srcb->rowPitch, srcb->format, filter ) + || !_LoadScanlineLinear( srow[2], width, srcc->pixels + (srcc->rowPitch * u2), srcc->rowPitch, srcc->format, filter ) + || !_LoadScanlineLinear( srow[3], width, srcd->pixels + (srcd->rowPitch * u2), srcd->rowPitch, srcd->format, filter ) ) + return E_FAIL; + } + else + { + u2 = u3; + u3 = size_t(-1); + + std::swap( srow[0], trow[0] ); + std::swap( srow[1], trow[1] ); + std::swap( srow[2], trow[2] ); + std::swap( srow[3], trow[3] ); + } + } + + // Scanline 4 + if ( toY.u3 != u3 ) + { + u3 = toY.u3; + + if ( !_LoadScanlineLinear( trow[0], width, srca->pixels + (srca->rowPitch * u3), srca->rowPitch, srca->format, filter ) + || !_LoadScanlineLinear( trow[1], width, srcb->pixels + (srcb->rowPitch * u3), srcb->rowPitch, srcb->format, filter ) + || !_LoadScanlineLinear( trow[2], width, srcc->pixels + (srcc->rowPitch * u3), srcc->rowPitch, srcc->format, filter ) + || !_LoadScanlineLinear( trow[3], width, srcd->pixels + (srcd->rowPitch * u3), srcd->rowPitch, srcd->format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < nwidth; ++x ) + { + auto& toX = cfX[ x ]; + + XMVECTOR D[4]; + + for( size_t j=0; j < 4; ++j ) + { + XMVECTOR C0, C1, C2, C3; + CUBIC_INTERPOLATE( C0, toX.x, urow[j][ toX.u0 ], urow[j][ toX.u1 ], urow[j][ toX.u2 ], urow[j][ toX.u3 ] ); + CUBIC_INTERPOLATE( C1, toX.x, vrow[j][ toX.u0 ], vrow[j][ toX.u1 ], vrow[j][ toX.u2 ], vrow[j][ toX.u3 ] ); + CUBIC_INTERPOLATE( C2, toX.x, srow[j][ toX.u0 ], srow[j][ toX.u1 ], srow[j][ toX.u2 ], srow[j][ toX.u3 ] ); + CUBIC_INTERPOLATE( C3, toX.x, trow[j][ toX.u0 ], trow[j][ toX.u1 ], trow[j][ toX.u2 ], trow[j][ toX.u3 ] ); + + CUBIC_INTERPOLATE( D[j], toY.x, C0, C1, C2, C3 ); + } + + CUBIC_INTERPOLATE( target[x], toZ.x, D[0], D[1], D[2], D[3] ); + } + + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) + return E_FAIL; + pDest += dest->rowPitch; + } + } + } + else + { + // 2D cubic filter + const Image* src = mipChain.GetImage( level-1, 0, 0 ); + const Image* dest = mipChain.GetImage( level, 0, 0 ); + + if ( !src || !dest ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + uint8_t* pDest = dest->pixels; + + size_t rowPitch = src->rowPitch; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + size_t u2 = size_t(-1); + size_t u3 = size_t(-1); + + for( size_t y = 0; y < nheight; ++y ) + { + auto& toY = cfY[ y ]; + + // Scanline 1 + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( urow[0], width, pSrc + (rowPitch * u0), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else if ( toY.u0 == u1 ) + { + u0 = u1; + u1 = size_t(-1); + + std::swap( urow[0], vrow[0] ); + } + else if ( toY.u0 == u2 ) + { + u0 = u2; + u2 = size_t(-1); + + std::swap( urow[0], srow[0] ); + } + else if ( toY.u0 == u3 ) + { + u0 = u3; + u3 = size_t(-1); + + std::swap( urow[0], trow[0] ); + } + } + + // Scanline 2 + if ( toY.u1 != u1 ) + { + if ( toY.u1 != u2 && toY.u1 != u3 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( vrow[0], width, pSrc + (rowPitch * u1), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else if ( toY.u1 == u2 ) + { + u1 = u2; + u2 = size_t(-1); + + std::swap( vrow[0], srow[0] ); + } + else if ( toY.u1 == u3 ) + { + u1 = u3; + u3 = size_t(-1); + + std::swap( vrow[0], trow[0] ); + } + } + + // Scanline 3 + if ( toY.u2 != u2 ) + { + if ( toY.u2 != u3 ) + { + u2 = toY.u2; + + if ( !_LoadScanlineLinear( srow[0], width, pSrc + (rowPitch * u2), rowPitch, src->format, filter ) ) + return E_FAIL; + } + else + { + u2 = u3; + u3 = size_t(-1); + + std::swap( srow[0], trow[0] ); + } + } + + // Scanline 4 + if ( toY.u3 != u3 ) + { + u3 = toY.u3; + + if ( !_LoadScanlineLinear( trow[0], width, pSrc + (rowPitch * u3), rowPitch, src->format, filter ) ) + return E_FAIL; + } for( size_t x = 0; x < nwidth; ++x ) { - size_t x2 = x*2; + auto& toX = cfX[ x ]; - // Box filter: Average 2x2 pixels - XMVECTOR v = XMVectorAdd( urow0[ x2 ], urow1[ x2 ] ); - v = XMVectorAdd( v, urow2[ x2 ] ); - v = XMVectorAdd( v, urow3[ x2 ] ); + XMVECTOR C0, C1, C2, C3; + CUBIC_INTERPOLATE( C0, toX.x, urow[0][ toX.u0 ], urow[0][ toX.u1 ], urow[0][ toX.u2 ], urow[0][ toX.u3 ] ); + CUBIC_INTERPOLATE( C1, toX.x, vrow[0][ toX.u0 ], vrow[0][ toX.u1 ], vrow[0][ toX.u2 ], vrow[0][ toX.u3 ] ); + CUBIC_INTERPOLATE( C2, toX.x, srow[0][ toX.u0 ], srow[0][ toX.u1 ], srow[0][ toX.u2 ], srow[0][ toX.u3 ] ); + CUBIC_INTERPOLATE( C3, toX.x, trow[0][ toX.u0 ], trow[0][ toX.u1 ], trow[0][ toX.u2 ], trow[0][ toX.u3 ] ); - target[ x ] = XMVectorMultiply( v, s_boxScale ); + CUBIC_INTERPOLATE( target[x], toY.x, C0, C1, C2, C3 ); } - if ( !_StoreScanline( pDest, dest->rowPitch, dest->format, target, nwidth ) ) + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, target, nwidth, filter ) ) return E_FAIL; pDest += dest->rowPitch; } @@ -830,7 +2255,245 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, depth >>= 1; } - assert( height == 1 && width == 1 && depth == 1 ); + return S_OK; +} + + +//--- 3D Triangle Filter --- +static HRESULT _Generate3DMipsTriangleFilter( _In_ size_t depth, _In_ size_t levels, _In_ DWORD filter, _In_ const ScratchImage& mipChain ) +{ + if ( !depth || !mipChain.GetImages() ) + return E_INVALIDARG; + + using namespace TriangleFilter; + + // This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips) + + assert( levels > 1 ); + + size_t width = mipChain.GetMetadata().width; + size_t height = mipChain.GetMetadata().height; + + // Allocate initial temporary space (1 scanline, accumulation rows, plus X/Y/Z filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR) * width, 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<TriangleRow[]> sliceActive( new (std::nothrow) TriangleRow[ depth ] ); + if ( !sliceActive ) + return E_OUTOFMEMORY; + + TriangleRow * sliceFree = nullptr; + + std::unique_ptr<Filter> tfX, tfY, tfZ; + + XMVECTOR* row = scanline.get(); + + // Resize base image to each target mip level + for( size_t level=1; level < levels; ++level ) + { + size_t nwidth = (width > 1) ? (width >> 1) : 1; + HRESULT hr = _Create( width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, tfX ); + if ( FAILED(hr) ) + return hr; + + size_t nheight = (height > 1) ? (height >> 1) : 1; + hr = _Create( height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, tfY ); + if ( FAILED(hr) ) + return hr; + + size_t ndepth = (depth > 1 ) ? (depth >> 1) : 1; + hr = _Create( depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, tfZ ); + if ( FAILED(hr) ) + return hr; + +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*width ); +#endif + + auto xFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfX.get() ) + tfX->sizeInBytes ); + auto yFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfY.get() ) + tfY->sizeInBytes ); + auto zFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfZ.get() ) + tfZ->sizeInBytes ); + + // Count times slices get written (and clear out any leftover accumulation slices from last miplevel) + for( FilterFrom* zFrom = tfZ->from; zFrom < zFromEnd; ) + { + for ( size_t j = 0; j < zFrom->count; ++j ) + { + size_t w = zFrom->to[ j ].u; + assert( w < ndepth ); + TriangleRow* sliceAcc = &sliceActive[ w ]; + + ++sliceAcc->remaining; + + if ( sliceAcc->scanline ) + { + memset( sliceAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth * nheight ); + } + } + + zFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( zFrom ) + zFrom->sizeInBytes ); + } + + // Filter image + size_t z = 0; + for( FilterFrom* zFrom = tfZ->from; zFrom < zFromEnd; ++z ) + { + // Create accumulation slices as needed + for ( size_t j = 0; j < zFrom->count; ++j ) + { + size_t w = zFrom->to[ j ].u; + assert( w < ndepth ); + TriangleRow* sliceAcc = &sliceActive[ w ]; + + if ( !sliceAcc->scanline ) + { + if ( sliceFree ) + { + // Steal and reuse scanline from 'free slice' list + // (it will always be at least as large as nwidth*nheight due to loop decending order) + assert( sliceFree->scanline != 0 ); + sliceAcc->scanline.reset( sliceFree->scanline.release() ); + sliceFree = sliceFree->next; + } + else + { + size_t bytes = sizeof(XMVECTOR) * nwidth * nheight; + sliceAcc->scanline.reset( reinterpret_cast<XMVECTOR*>( _aligned_malloc( bytes, 16 ) ) ); + if ( !sliceAcc->scanline ) + return E_OUTOFMEMORY; + } + + memset( sliceAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth * nheight ); + } + } + + assert( z < depth ); + const Image* src = mipChain.GetImage( level-1, 0, z ); + if ( !src ) + return E_POINTER; + + const uint8_t* pSrc = src->pixels; + size_t rowPitch = src->rowPitch; + const uint8_t* pEndSrc = pSrc + rowPitch * height; + + for( FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; ) + { + // Load source scanline + if ( (pSrc + rowPitch) > pEndSrc ) + return E_FAIL; + + if ( !_LoadScanlineLinear( row, width, pSrc, rowPitch, src->format, filter ) ) + return E_FAIL; + + pSrc += rowPitch; + + // Process row + size_t x = 0; + for( FilterFrom* xFrom = tfX->from; xFrom < xFromEnd; ++x ) + { + for ( size_t j = 0; j < zFrom->count; ++j ) + { + size_t w = zFrom->to[ j ].u; + assert( w < ndepth ); + float zweight = zFrom->to[ j ].weight; + + XMVECTOR* accSlice = sliceActive[ w ].scanline.get(); + if ( !accSlice ) + return E_POINTER; + + for ( size_t k = 0; k < yFrom->count; ++k ) + { + size_t v = yFrom->to[ k ].u; + assert( v < nheight ); + float yweight = yFrom->to[ k ].weight; + + XMVECTOR * accPtr = accSlice + v * nwidth; + + for ( size_t l = 0; l < xFrom->count; ++l ) + { + size_t u = xFrom->to[ l ].u; + assert( u < nwidth ); + + XMVECTOR weight = XMVectorReplicate( zweight * yweight * xFrom->to[ l ].weight ); + + assert( x < width ); + accPtr[ u ] = XMVectorMultiplyAdd( row[ x ], weight, accPtr[ u ] ); + } + } + } + + xFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( xFrom ) + xFrom->sizeInBytes ); + } + + yFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( yFrom ) + yFrom->sizeInBytes ); + } + + // Write completed accumulation slices + for ( size_t j = 0; j < zFrom->count; ++j ) + { + size_t w = zFrom->to[ j ].u; + assert( w < ndepth ); + TriangleRow* sliceAcc = &sliceActive[ w ]; + + assert( sliceAcc->remaining > 0 ); + --sliceAcc->remaining; + + if ( !sliceAcc->remaining ) + { + const Image* dest = mipChain.GetImage( level, 0, w ); + XMVECTOR* pAccSrc = sliceAcc->scanline.get(); + if ( !dest || !pAccSrc ) + return E_POINTER; + + uint8_t* pDest = dest->pixels; + + for( size_t h = 0; h < nheight; ++h ) + { + switch( dest->format ) + { + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + { + // Need to slightly bias results for floating-point error accumulation which can + // be visible with harshly quantized values + static const XMVECTORF32 Bias = { 0.f, 0.f, 0.f, 0.1f }; + + XMVECTOR* ptr = pAccSrc; + for( size_t i=0; i < dest->width; ++i, ++ptr ) + { + *ptr = XMVectorAdd( *ptr, Bias ); + } + } + break; + } + + // This performs any required clamping + if ( !_StoreScanlineLinear( pDest, dest->rowPitch, dest->format, pAccSrc, dest->width, filter ) ) + return E_FAIL; + + pDest += dest->rowPitch; + pAccSrc += nwidth; + } + + // Put slice on freelist to reuse it's allocated scanline + sliceAcc->next = sliceFree; + sliceFree = sliceAcc; + } + } + + zFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( zFrom ) + zFrom->sizeInBytes ); + } + + if ( height > 1 ) + height >>= 1; + + if ( width > 1 ) + width >>= 1; + + if ( depth > 1 ) + depth >>= 1; + } return S_OK; } @@ -843,6 +2506,7 @@ static HRESULT _Generate3DMipsBoxFilter( _In_ size_t depth, _In_ size_t levels, //------------------------------------------------------------------------------------- // Generate mipmap chain //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GenerateMipMaps( const Image& baseImage, DWORD filter, size_t levels, ScratchImage& mipChain, bool allow1D ) { if ( !IsValid( baseImage.format ) ) @@ -854,157 +2518,363 @@ HRESULT GenerateMipMaps( const Image& baseImage, DWORD filter, size_t levels, Sc if ( !_CalculateMipLevels(baseImage.width, baseImage.height, levels) ) return E_INVALIDARG; - if ( IsCompressed( baseImage.format ) || IsVideo( baseImage.format ) ) + if ( levels <= 1 ) + return E_INVALIDARG; + + if ( IsCompressed(baseImage.format) || IsTypeless(baseImage.format) || IsPlanar(baseImage.format) || IsPalettized(baseImage.format) ) { return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } + HRESULT hr; + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); - switch(filter & TEX_FILTER_MASK) + + if ( _UseWICFiltering( baseImage.format, filter ) ) { - case 0: - case TEX_FILTER_POINT: - case TEX_FILTER_FANT: // Equivalent to Box filter - case TEX_FILTER_LINEAR: - case TEX_FILTER_CUBIC: + //--- Use WIC filtering to generate mipmaps ----------------------------------- + switch(filter & TEX_FILTER_MASK) { - WICPixelFormatGUID pfGUID; - if ( _DXGIToWIC( baseImage.format, pfGUID ) ) - { - // Case 1: Base image format is supported by Windows Imaging Component - HRESULT hr = (baseImage.height > 1 || !allow1D) + case 0: + case TEX_FILTER_POINT: + case TEX_FILTER_FANT: // Equivalent to Box filter + case TEX_FILTER_LINEAR: + case TEX_FILTER_CUBIC: + { + static_assert( TEX_FILTER_FANT == TEX_FILTER_BOX, "TEX_FILTER_ flag alias mismatch" ); + + WICPixelFormatGUID pfGUID; + if ( _DXGIToWIC( baseImage.format, pfGUID, true ) ) + { + // Case 1: Base image format is supported by Windows Imaging Component + hr = (baseImage.height > 1 || !allow1D) ? mipChain.Initialize2D( baseImage.format, baseImage.width, baseImage.height, 1, levels ) : mipChain.Initialize1D( baseImage.format, baseImage.width, 1, levels ); + if ( FAILED(hr) ) + return hr; + + return _GenerateMipMapsUsingWIC( baseImage, filter, levels, pfGUID, mipChain, 0 ); + } + else + { + // Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back + assert( baseImage.format != DXGI_FORMAT_R32G32B32A32_FLOAT ); + ScratchImage temp; + hr = _ConvertToR32G32B32A32( baseImage, temp ); + if ( FAILED(hr) ) + return hr; + + const Image *timg = temp.GetImage( 0, 0, 0 ); + if ( !timg ) + return E_POINTER; + + ScratchImage tMipChain; + hr = (baseImage.height > 1 || !allow1D) + ? tMipChain.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, baseImage.width, baseImage.height, 1, levels ) + : tMipChain.Initialize1D( DXGI_FORMAT_R32G32B32A32_FLOAT, baseImage.width, 1, levels ); + if ( FAILED(hr) ) + return hr; + + hr = _GenerateMipMapsUsingWIC( *timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, 0 ); + if ( FAILED(hr) ) + return hr; + + temp.Release(); + + return _ConvertFromR32G32B32A32( tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), baseImage.format, mipChain ); + } + } + break; + + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + } + else + { + //--- Use custom filters to generate mipmaps ---------------------------------- + TexMetadata mdata; + memset( &mdata, 0, sizeof(mdata) ); + mdata.width = baseImage.width; + if ( baseImage.height > 1 || !allow1D ) + { + mdata.height = baseImage.height; + mdata.dimension = TEX_DIMENSION_TEXTURE2D; + } + else + { + mdata.height = 1; + mdata.dimension= TEX_DIMENSION_TEXTURE1D; + } + mdata.depth = mdata.arraySize = 1; + mdata.mipLevels = levels; + mdata.format = baseImage.format; + + DWORD filter_select = ( filter & TEX_FILTER_MASK ); + if ( !filter_select ) + { + // Default filter choice + filter_select = ( ispow2(baseImage.width) && ispow2(baseImage.height) ) ? TEX_FILTER_BOX : TEX_FILTER_LINEAR; + } + + switch( filter_select ) + { + case TEX_FILTER_BOX: + hr = _Setup2DMips( &baseImage, 1, mdata, mipChain ); if ( FAILED(hr) ) return hr; - return _GenerateMipMapsUsingWIC( baseImage, filter, levels, pfGUID, mipChain, 0 ); - } - else - { - // Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back - assert( baseImage.format != DXGI_FORMAT_R32G32B32A32_FLOAT ); - ScratchImage temp; - HRESULT hr = _ConvertToR32G32B32A32( baseImage, temp ); + hr = _Generate2DMipsBoxFilter( levels, filter, mipChain, 0 ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_POINT: + hr = _Setup2DMips( &baseImage, 1, mdata, mipChain ); if ( FAILED(hr) ) return hr; - const Image *timg = temp.GetImage( 0, 0, 0 ); - if ( !timg ) - return E_POINTER; + hr = _Generate2DMipsPointFilter( levels, mipChain, 0 ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; - ScratchImage tMipChain; - hr = _GenerateMipMapsUsingWIC( *timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, 0 ); + case TEX_FILTER_LINEAR: + hr = _Setup2DMips( &baseImage, 1, mdata, mipChain ); if ( FAILED(hr) ) return hr; - temp.Release(); + hr = _Generate2DMipsLinearFilter( levels, filter, mipChain, 0 ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; - return _ConvertFromR32G32B32A32( tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), baseImage.format, mipChain ); - } - } - break; + case TEX_FILTER_CUBIC: + hr = _Setup2DMips( &baseImage, 1, mdata, mipChain ); + if ( FAILED(hr) ) + return hr; - default: - return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + hr = _Generate2DMipsCubicFilter( levels, filter, mipChain, 0 ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_TRIANGLE: + hr = _Setup2DMips( &baseImage, 1, mdata, mipChain ); + if ( FAILED(hr) ) + return hr; + + hr = _Generate2DMipsTriangleFilter( levels, filter, mipChain, 0 ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } } } +_Use_decl_annotations_ HRESULT GenerateMipMaps( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DWORD filter, size_t levels, ScratchImage& mipChain ) { if ( !srcImages || !nimages || !IsValid(metadata.format) ) return E_INVALIDARG; - if ( metadata.dimension == TEX_DIMENSION_TEXTURE3D - || IsCompressed( metadata.format ) || IsVideo( metadata.format ) ) + if ( metadata.IsVolumemap() + || IsCompressed(metadata.format) || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); if ( !_CalculateMipLevels(metadata.width, metadata.height, levels) ) return E_INVALIDARG; + if ( levels <= 1 ) + return E_INVALIDARG; + + std::vector<Image> baseImages; + baseImages.reserve( metadata.arraySize ); + for( size_t item=0; item < metadata.arraySize; ++item ) + { + size_t index = metadata.ComputeIndex( 0, item, 0); + if ( index >= nimages ) + return E_FAIL; + + const Image& src = srcImages[ index ]; + if ( !src.pixels ) + return E_POINTER; + + if ( src.format != metadata.format || src.width != metadata.width || src.height != metadata.height ) + { + // All base images must be the same format, width, and height + return E_FAIL; + } + + baseImages.push_back( src ); + } + + assert( baseImages.size() == metadata.arraySize ); + + HRESULT hr; + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); - switch(filter & TEX_FILTER_MASK) + + if ( _UseWICFiltering( metadata.format, filter ) ) { - case 0: - case TEX_FILTER_POINT: - case TEX_FILTER_FANT: // Equivalent to Box filter - case TEX_FILTER_LINEAR: - case TEX_FILTER_CUBIC: + //--- Use WIC filtering to generate mipmaps ----------------------------------- + switch(filter & TEX_FILTER_MASK) { - WICPixelFormatGUID pfGUID; - if ( _DXGIToWIC( metadata.format, pfGUID ) ) + case 0: + case TEX_FILTER_POINT: + case TEX_FILTER_FANT: // Equivalent to Box filter + case TEX_FILTER_LINEAR: + case TEX_FILTER_CUBIC: { - // Case 1: Base image format is supported by Windows Imaging Component - TexMetadata mdata2 = metadata; - mdata2.mipLevels = levels; - HRESULT hr = mipChain.Initialize( mdata2 ); + static_assert( TEX_FILTER_FANT == TEX_FILTER_BOX, "TEX_FILTER_ flag alias mismatch" ); + + WICPixelFormatGUID pfGUID; + if ( _DXGIToWIC( metadata.format, pfGUID, true ) ) + { + // Case 1: Base image format is supported by Windows Imaging Component + TexMetadata mdata2 = metadata; + mdata2.mipLevels = levels; + hr = mipChain.Initialize( mdata2 ); + if ( FAILED(hr) ) + return hr; + + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + hr = _GenerateMipMapsUsingWIC( baseImages[item], filter, levels, pfGUID, mipChain, item ); + if ( FAILED(hr) ) + { + mipChain.Release(); + return hr; + } + } + + return S_OK; + } + else + { + // Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back + assert( metadata.format != DXGI_FORMAT_R32G32B32A32_FLOAT ); + + TexMetadata mdata2 = metadata; + mdata2.mipLevels = levels; + mdata2.format = DXGI_FORMAT_R32G32B32A32_FLOAT; + ScratchImage tMipChain; + hr = tMipChain.Initialize( mdata2 ); + if ( FAILED(hr) ) + return hr; + + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + ScratchImage temp; + hr = _ConvertToR32G32B32A32( baseImages[item], temp ); + if ( FAILED(hr) ) + return hr; + + const Image *timg = temp.GetImage( 0, 0, 0 ); + if ( !timg ) + return E_POINTER; + + hr = _GenerateMipMapsUsingWIC( *timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, item ); + if ( FAILED(hr) ) + return hr; + } + + return _ConvertFromR32G32B32A32( tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), metadata.format, mipChain ); + } + } + break; + + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + } + else + { + //--- Use custom filters to generate mipmaps ---------------------------------- + TexMetadata mdata2 = metadata; + mdata2.mipLevels = levels; + + DWORD filter_select = ( filter & TEX_FILTER_MASK ); + if ( !filter_select ) + { + // Default filter choice + filter_select = ( ispow2(metadata.width) && ispow2(metadata.height) ) ? TEX_FILTER_BOX : TEX_FILTER_LINEAR; + } + + switch( filter_select ) + { + case TEX_FILTER_BOX: + hr = _Setup2DMips( &baseImages[0], metadata.arraySize, mdata2, mipChain ); if ( FAILED(hr) ) return hr; for( size_t item = 0; item < metadata.arraySize; ++item ) { - size_t index = metadata.ComputeIndex( 0, item, 0 ); - if ( index >= nimages ) - { + hr = _Generate2DMipsBoxFilter( levels, filter, mipChain, item ); + if ( FAILED(hr) ) mipChain.Release(); - return E_FAIL; - } + } + return hr; - const Image& baseImage = srcImages[ index ]; + case TEX_FILTER_POINT: + hr = _Setup2DMips( &baseImages[0], metadata.arraySize, mdata2, mipChain ); + if ( FAILED(hr) ) + return hr; - hr = _GenerateMipMapsUsingWIC( baseImage, filter, levels, pfGUID, mipChain, item ); + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + hr = _Generate2DMipsPointFilter( levels, mipChain, item ); if ( FAILED(hr) ) - { mipChain.Release(); - return hr; - } } + return hr; - return S_OK; - } - else - { - // Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back - assert( metadata.format != DXGI_FORMAT_R32G32B32A32_FLOAT ); - - TexMetadata mdata2 = metadata; - mdata2.mipLevels = levels; - mdata2.format = DXGI_FORMAT_R32G32B32A32_FLOAT; - ScratchImage tMipChain; - HRESULT hr = tMipChain.Initialize( mdata2 ); + case TEX_FILTER_LINEAR: + hr = _Setup2DMips( &baseImages[0], metadata.arraySize, mdata2, mipChain ); if ( FAILED(hr) ) return hr; for( size_t item = 0; item < metadata.arraySize; ++item ) { - size_t index = metadata.ComputeIndex( 0, item, 0 ); - if ( index >= nimages ) - return E_FAIL; + hr = _Generate2DMipsLinearFilter( levels, filter, mipChain, item ); + if ( FAILED(hr) ) + mipChain.Release(); + } + return hr; - const Image& baseImage = srcImages[ index ]; + case TEX_FILTER_CUBIC: + hr = _Setup2DMips( &baseImages[0], metadata.arraySize, mdata2, mipChain ); + if ( FAILED(hr) ) + return hr; - ScratchImage temp; - hr = _ConvertToR32G32B32A32( baseImage, temp ); + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + hr = _Generate2DMipsCubicFilter( levels, filter, mipChain, item ); if ( FAILED(hr) ) - return hr; + mipChain.Release(); + } + return hr; - const Image *timg = temp.GetImage( 0, 0, 0 ); - if ( !timg ) - return E_POINTER; + case TEX_FILTER_TRIANGLE: + hr = _Setup2DMips( &baseImages[0], metadata.arraySize, mdata2, mipChain ); + if ( FAILED(hr) ) + return hr; - hr = _GenerateMipMapsUsingWIC( *timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, item ); + for( size_t item = 0; item < metadata.arraySize; ++item ) + { + hr = _Generate2DMipsTriangleFilter( levels, filter, mipChain, item ); if ( FAILED(hr) ) - return hr; + mipChain.Release(); } + return hr; - return _ConvertFromR32G32B32A32( tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), metadata.format, mipChain ); - } + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } - break; - - default: - return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );; } } @@ -1012,19 +2882,23 @@ HRESULT GenerateMipMaps( const Image* srcImages, size_t nimages, const TexMetada //------------------------------------------------------------------------------------- // Generate mipmap chain for volume texture //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GenerateMipMaps3D( const Image* baseImages, size_t depth, DWORD filter, size_t levels, ScratchImage& mipChain ) { if ( !baseImages || !depth ) return E_INVALIDARG; + if ( filter & TEX_FILTER_FORCE_WIC ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + DXGI_FORMAT format = baseImages[0].format; size_t width = baseImages[0].width; size_t height = baseImages[0].height; - if ( !ispow2(width) || !ispow2(height) || !ispow2(depth) ) + if ( !_CalculateMipLevels3D(width, height, depth, levels) ) return E_INVALIDARG; - if ( !_CalculateMipLevels3D(width, height, depth, levels) ) + if ( levels <= 1 ) return E_INVALIDARG; for( size_t slice=0; slice < depth; ++slice ) @@ -1039,30 +2913,33 @@ HRESULT GenerateMipMaps3D( const Image* baseImages, size_t depth, DWORD filter, } } - if ( IsCompressed( format ) ) - { - // We don't support generating mipmaps from compressed images, as those should be generated before compression + if ( IsCompressed(format) || IsTypeless(format) || IsPlanar(format) || IsPalettized(format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); - } + + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); HRESULT hr; - static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); - switch( filter & TEX_FILTER_MASK ) + DWORD filter_select = ( filter & TEX_FILTER_MASK ); + if ( !filter_select ) + { + // Default filter choice + filter_select = ( ispow2(width) && ispow2(height) && ispow2(depth) ) ? TEX_FILTER_BOX : TEX_FILTER_TRIANGLE; + } + + switch( filter_select ) { - case 0: - case TEX_FILTER_FANT: + case TEX_FILTER_BOX: hr = _Setup3DMips( baseImages, depth, levels, mipChain ); if ( FAILED(hr) ) return hr; - // For decimation, Fant is equivalent to a Box filter - hr = _Generate3DMipsBoxFilter( depth, levels, mipChain ); + hr = _Generate3DMipsBoxFilter( depth, levels, filter, mipChain ); if ( FAILED(hr) ) mipChain.Release(); return hr; - case WIC_FLAGS_FILTER_POINT: + case TEX_FILTER_POINT: hr = _Setup3DMips( baseImages, depth, levels, mipChain ); if ( FAILED(hr) ) return hr; @@ -1072,34 +2949,62 @@ HRESULT GenerateMipMaps3D( const Image* baseImages, size_t depth, DWORD filter, mipChain.Release(); return hr; - case WIC_FLAGS_FILTER_LINEAR: - // Need to implement a 3D bi-linear filter (2x2x2) - return E_NOTIMPL; + case TEX_FILTER_LINEAR: + hr = _Setup3DMips( baseImages, depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; + + hr = _Generate3DMipsLinearFilter( depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_CUBIC: + hr = _Setup3DMips( baseImages, depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; + + hr = _Generate3DMipsCubicFilter( depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_TRIANGLE: + hr = _Setup3DMips( baseImages, depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; - case WIC_FLAGS_FILTER_CUBIC: - // Need to implement a 3D bi-cubic filter (3x3x3) - return E_NOTIMPL; + hr = _Generate3DMipsTriangleFilter( depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; default: - return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );; + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } } +_Use_decl_annotations_ HRESULT GenerateMipMaps3D( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DWORD filter, size_t levels, ScratchImage& mipChain ) { - if ( !srcImages || !nimages || !IsValid(metadata.format) - || !ispow2(metadata.width) || !ispow2(metadata.height) || !ispow2(metadata.depth) ) + if ( !srcImages || !nimages || !IsValid(metadata.format) ) return E_INVALIDARG; - if ( metadata.dimension != TEX_DIMENSION_TEXTURE3D - || IsCompressed( metadata.format ) || IsVideo( metadata.format ) ) + if ( filter & TEX_FILTER_FORCE_WIC ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + + if ( !metadata.IsVolumemap() + || IsCompressed(metadata.format) || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); if ( !_CalculateMipLevels3D(metadata.width, metadata.height, metadata.depth, levels) ) return E_INVALIDARG; - - std::vector<const Image> baseImages; + + if ( levels <= 1 ) + return E_INVALIDARG; + + std::vector<Image> baseImages; baseImages.reserve( metadata.depth ); for( size_t slice=0; slice < metadata.depth; ++slice ) { @@ -1125,21 +3030,27 @@ HRESULT GenerateMipMaps3D( const Image* srcImages, size_t nimages, const TexMeta HRESULT hr; static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); - switch( filter & TEX_FILTER_MASK ) + + DWORD filter_select = ( filter & TEX_FILTER_MASK ); + if ( !filter_select ) + { + // Default filter choice + filter_select = ( ispow2(metadata.width) && ispow2(metadata.height) && ispow2(metadata.depth) ) ? TEX_FILTER_BOX : TEX_FILTER_TRIANGLE; + } + + switch( filter_select ) { - case 0: - case TEX_FILTER_FANT: + case TEX_FILTER_BOX: hr = _Setup3DMips( &baseImages[0], metadata.depth, levels, mipChain ); if ( FAILED(hr) ) return hr; - // For decimation, Fant is equivalent to a Box filter - hr = _Generate3DMipsBoxFilter( metadata.depth, levels, mipChain ); + hr = _Generate3DMipsBoxFilter( metadata.depth, levels, filter, mipChain ); if ( FAILED(hr) ) mipChain.Release(); return hr; - case WIC_FLAGS_FILTER_POINT: + case TEX_FILTER_POINT: hr = _Setup3DMips( &baseImages[0], metadata.depth, levels, mipChain ); if ( FAILED(hr) ) return hr; @@ -1149,16 +3060,38 @@ HRESULT GenerateMipMaps3D( const Image* srcImages, size_t nimages, const TexMeta mipChain.Release(); return hr; - case WIC_FLAGS_FILTER_LINEAR: - // Need to implement a 3D bi-linear filter (2x2x2) - return E_NOTIMPL; + case TEX_FILTER_LINEAR: + hr = _Setup3DMips( &baseImages[0], metadata.depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; + + hr = _Generate3DMipsLinearFilter( metadata.depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_CUBIC: + hr = _Setup3DMips( &baseImages[0], metadata.depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; + + hr = _Generate3DMipsCubicFilter( metadata.depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; + + case TEX_FILTER_TRIANGLE: + hr = _Setup3DMips( &baseImages[0], metadata.depth, levels, mipChain ); + if ( FAILED(hr) ) + return hr; - case WIC_FLAGS_FILTER_CUBIC: - // Need to implement a 3D bi-cubic filter (3x3x3) - return E_NOTIMPL; + hr = _Generate3DMipsTriangleFilter( metadata.depth, levels, filter, mipChain ); + if ( FAILED(hr) ) + mipChain.Release(); + return hr; default: - return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );; + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } } diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexMisc.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexMisc.cpp index 988fb1fb..3285ba5d 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexMisc.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexMisc.cpp @@ -17,10 +17,12 @@ namespace DirectX { +static const XMVECTORF32 g_Gamma22 = { 2.2f, 2.2f, 2.2f, 1.f }; //------------------------------------------------------------------------------------- static HRESULT _ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, - _Out_ float& mse, _Out_opt_cap_c_(4) float* mseV ) + _Out_ float& mse, _Out_writes_opt_(4) float* mseV, + _In_ DWORD flags ) { if ( !image1.pixels || !image2.pixels ) return E_POINTER; @@ -34,13 +36,55 @@ static HRESULT _ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, if ( !scanline ) return E_OUTOFMEMORY; + // Flags implied from image formats + switch( image1.format ) + { + case DXGI_FORMAT_B8G8R8X8_UNORM: + flags |= CMSE_IGNORE_ALPHA; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + flags |= CMSE_IMAGE1_SRGB | CMSE_IGNORE_ALPHA; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_BC7_UNORM_SRGB: + flags |= CMSE_IMAGE1_SRGB; + break; + } + + switch( image2.format ) + { + case DXGI_FORMAT_B8G8R8X8_UNORM: + flags |= CMSE_IGNORE_ALPHA; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + flags |= CMSE_IMAGE2_SRGB | CMSE_IGNORE_ALPHA; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_BC7_UNORM_SRGB: + flags |= CMSE_IMAGE2_SRGB; + break; + } + const uint8_t *pSrc1 = image1.pixels; const size_t rowPitch1 = image1.rowPitch; const uint8_t *pSrc2 = image2.pixels; const size_t rowPitch2 = image2.rowPitch; - XMVECTOR acc = XMVectorZero(); + XMVECTOR acc = g_XMZero; + static XMVECTORF32 two = { 2.0f, 2.0f, 2.0f, 2.0f }; for( size_t h = 0; h < image1.height; ++h ) { @@ -52,10 +96,47 @@ static HRESULT _ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, if ( !_LoadScanline( ptr2, width, pSrc2, rowPitch2, image2.format ) ) return E_FAIL; - for( size_t i = 0; i < width; ++i, ++ptr1, ++ptr2 ) + for( size_t i = 0; i < width; ++i ) { + XMVECTOR v1 = *(ptr1++); + if ( flags & CMSE_IMAGE1_SRGB ) + { + v1 = XMVectorPow( v1, g_Gamma22 ); + } + if ( flags & CMSE_IMAGE1_X2_BIAS ) + { + v1 = XMVectorMultiplyAdd( v1, two, g_XMNegativeOne ); + } + + XMVECTOR v2 = *(ptr2++); + if ( flags & CMSE_IMAGE2_SRGB ) + { + v2 = XMVectorPow( v2, g_Gamma22 ); + } + if ( flags & CMSE_IMAGE2_X2_BIAS ) + { + v1 = XMVectorMultiplyAdd( v2, two, g_XMNegativeOne ); + } + // sum[ (I1 - I2)^2 ] - XMVECTOR v = XMVectorSubtract( *ptr1, *ptr2 ); + XMVECTOR v = XMVectorSubtract( v1, v2 ); + if ( flags & CMSE_IGNORE_RED ) + { + v = XMVectorSelect( v, g_XMZero, g_XMMaskX ); + } + if ( flags & CMSE_IGNORE_GREEN ) + { + v = XMVectorSelect( v, g_XMZero, g_XMMaskY ); + } + if ( flags & CMSE_IGNORE_BLUE ) + { + v = XMVectorSelect( v, g_XMZero, g_XMMaskZ ); + } + if ( flags & CMSE_IGNORE_ALPHA ) + { + v = XMVectorSelect( v, g_XMZero, g_XMMaskW ); + } + acc = XMVectorMultiplyAdd( v, v, acc ); } @@ -89,12 +170,15 @@ static HRESULT _ComputeMSE( _In_ const Image& image1, _In_ const Image& image2, //------------------------------------------------------------------------------------- // Copies a rectangle from one image into another //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT CopyRectangle( const Image& srcImage, const Rect& srcRect, const Image& dstImage, DWORD filter, size_t xOffset, size_t yOffset ) { if ( !srcImage.pixels || !dstImage.pixels ) return E_POINTER; - if ( IsCompressed( srcImage.format ) || IsCompressed( dstImage.format ) ) + if ( IsCompressed( srcImage.format ) || IsCompressed( dstImage.format ) + || IsPlanar( srcImage.format ) || IsPlanar( dstImage.format ) + || IsPalettized( srcImage.format ) || IsPalettized( dstImage.format ) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); // Validate rectangle/offset @@ -193,7 +277,8 @@ HRESULT CopyRectangle( const Image& srcImage, const Rect& srcRect, const Image& //------------------------------------------------------------------------------------- // Computes the Mean-Squared-Error (MSE) between two images //------------------------------------------------------------------------------------- -HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* mseV ) +_Use_decl_annotations_ +HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* mseV, DWORD flags ) { if ( !image1.pixels || !image2.pixels ) return E_POINTER; @@ -201,6 +286,10 @@ HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* if ( image1.width != image2.width || image1.height != image2.height ) return E_INVALIDARG; + if ( IsPlanar( image1.format ) || IsPlanar( image2.format ) + || IsPalettized( image1.format ) || IsPalettized( image2.format ) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + if ( IsCompressed(image1.format) ) { if ( IsCompressed(image2.format) ) @@ -221,7 +310,7 @@ HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* if ( !img1 || !img2 ) return E_POINTER; - return _ComputeMSE( *img1, *img2, mse, mseV ); + return _ComputeMSE( *img1, *img2, mse, mseV, flags ); } else { @@ -235,7 +324,7 @@ HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* if ( !img ) return E_POINTER; - return _ComputeMSE( *img, image2, mse, mseV ); + return _ComputeMSE( *img, image2, mse, mseV, flags ); } } else @@ -252,12 +341,12 @@ HRESULT ComputeMSE( const Image& image1, const Image& image2, float& mse, float* if ( !img ) return E_POINTER; - return _ComputeMSE( image1, *img, mse, mseV ); + return _ComputeMSE( image1, *img, mse, mseV, flags ); } else { // Case 4: neither image is compressed - return _ComputeMSE( image1, image2, mse, mseV ); + return _ComputeMSE( image1, image2, mse, mseV, flags ); } } } diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexNormalMaps.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexNormalMaps.cpp index ad594933..3e07c7e0 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexNormalMaps.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexNormalMaps.cpp @@ -48,7 +48,7 @@ static inline float _EvaluateColor( _In_ FXMVECTOR val, _In_ DWORD flags ) } } -static void _EvaluateRow( _In_count_(width) const XMVECTOR* pSource, _Out_cap_(width+2) float* pDest, +static void _EvaluateRow( _In_reads_(width) const XMVECTOR* pSource, _Out_writes_(width+2) float* pDest, _In_ size_t width, _In_ DWORD flags ) { assert( pSource && pDest ); @@ -79,14 +79,12 @@ static HRESULT _ComputeNMap( _In_ const Image& srcImage, _In_ DWORD flags, _In_ if ( !srcImage.pixels || !normalMap.pixels ) return E_INVALIDARG; - assert( !IsCompressed(format) && !IsTypeless( format ) ); - const DWORD convFlags = _GetConvertFlags( format ); if ( !convFlags ) return E_FAIL; if ( !( convFlags & (CONVF_UNORM | CONVF_SNORM | CONVF_FLOAT) ) ) - HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); const size_t width = srcImage.width; const size_t height = srcImage.height; @@ -253,10 +251,11 @@ static HRESULT _ComputeNMap( _In_ const Image& srcImage, _In_ DWORD flags, _In_ //------------------------------------------------------------------------------------- // Generates a normal map from a height-map //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT ComputeNormalMap( const Image& srcImage, DWORD flags, float amplitude, DXGI_FORMAT format, ScratchImage& normalMap ) { - if ( !srcImage.pixels || !IsValid(format) || IsCompressed( format ) || IsTypeless( format ) ) + if ( !srcImage.pixels || !IsValid(format) ) return E_INVALIDARG; static_assert( CNMAP_CHANNEL_RED == 0x1, "CNMAP_CHANNEL_ flag values don't match mask" ); @@ -274,7 +273,10 @@ HRESULT ComputeNormalMap( const Image& srcImage, DWORD flags, float amplitude, return E_INVALIDARG; } - if ( IsCompressed( srcImage.format ) || IsTypeless( srcImage.format ) ) + if ( IsCompressed(format) || IsCompressed(srcImage.format) + || IsTypeless(format) || IsTypeless(srcImage.format) + || IsPlanar(format) || IsPlanar(srcImage.format) + || IsPalettized(format) || IsPalettized(srcImage.format) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); // Setup target image @@ -301,14 +303,18 @@ HRESULT ComputeNormalMap( const Image& srcImage, DWORD flags, float amplitude, return S_OK; } +_Use_decl_annotations_ HRESULT ComputeNormalMap( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DWORD flags, float amplitude, DXGI_FORMAT format, ScratchImage& normalMaps ) { - if ( !srcImages || !nimages ) + if ( !srcImages || !nimages || !IsValid(format) ) return E_INVALIDARG; - if ( !IsValid(format) || IsCompressed(format) || IsTypeless(format) ) - return E_INVALIDARG; + if ( IsCompressed(format) || IsCompressed(metadata.format) + || IsTypeless(format) || IsTypeless(metadata.format) + || IsPlanar(format) || IsPlanar(metadata.format) + || IsPalettized(format) || IsPalettized(metadata.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); static_assert( CNMAP_CHANNEL_RED == 0x1, "CNMAP_CHANNEL_ flag values don't match mask" ); switch( flags & 0xf ) diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexP.h b/thirdparty/directxtex/DirectXTex/DirectXTexP.h index 4659b9bb..d435becf 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexP.h +++ b/thirdparty/directxtex/DirectXTex/DirectXTexP.h @@ -13,9 +13,7 @@ // http://go.microsoft.com/fwlink/?LinkId=248926 //------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once -#endif #ifndef NOMINMAX #define NOMINMAX @@ -41,20 +39,21 @@ #include <ole2.h> -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) -#include <d2d1.h> -#endif +#include "DirectXTex.h" +#ifndef __MINGW32__ +// VS 2010's stdint.h conflicts with intsafe.h #pragma warning(push) #pragma warning(disable : 4005) #include <wincodec.h> +#include <intsafe.h> #pragma warning(pop) - -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) && !defined(DXGI_1_2_FORMATS) -#define DXGI_1_2_FORMATS +#else +#include <wincodec.h> +#include <stdint.h> #endif -#include "DirectXTex.h" +#include <wrl/client.h> #include "scoped.h" @@ -66,14 +65,16 @@ namespace DirectX { //--------------------------------------------------------------------------------- // WIC helper functions - DXGI_FORMAT _WICToDXGI( _In_ const GUID& guid ); - bool _DXGIToWIC( _In_ DXGI_FORMAT format, _Out_ GUID& guid ); + DXGI_FORMAT __cdecl _WICToDXGI( _In_ const GUID& guid ); + bool __cdecl _DXGIToWIC( _In_ DXGI_FORMAT format, _Out_ GUID& guid, _In_ bool ignoreRGBvsBGR = false ); + + DWORD __cdecl _CheckWICColorSpace( _In_ const GUID& sourceGUID, _In_ const GUID& targetGUID ); - IWICImagingFactory* _GetWIC(); + IWICImagingFactory* __cdecl _GetWIC(); - bool _IsWIC2(); + bool __cdecl _IsWIC2(); - inline WICBitmapDitherType _GetWICDither( _In_ DWORD flags ) + inline WICBitmapDitherType __cdecl _GetWICDither( _In_ DWORD flags ) { static_assert( TEX_FILTER_DITHER == 0x10000, "TEX_FILTER_DITHER* flag values don't match mask" ); @@ -93,7 +94,7 @@ namespace DirectX } } - inline WICBitmapInterpolationMode _GetWICInterp( _In_ DWORD flags ) + inline WICBitmapInterpolationMode __cdecl _GetWICInterp( _In_ DWORD flags ) { static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); @@ -121,12 +122,13 @@ namespace DirectX //--------------------------------------------------------------------------------- // Image helper functions - void _DetermineImageArray( _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, - _Out_ size_t& nImages, _Out_ size_t& pixelSize ); + void __cdecl _DetermineImageArray( _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, + _Out_ size_t& nImages, _Out_ size_t& pixelSize ); - bool _SetupImageArray( _In_bytecount_(pixelSize) uint8_t *pMemory, _In_ size_t pixelSize, - _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, - _Out_cap_(nImages) Image* images, _In_ size_t nImages ); + _Success_(return != false) + bool __cdecl _SetupImageArray( _In_reads_bytes_(pixelSize) uint8_t *pMemory, _In_ size_t pixelSize, + _In_ const TexMetadata& metadata, _In_ DWORD cpFlags, + _Out_writes_(nImages) Image* images, _In_ size_t nImages ); //--------------------------------------------------------------------------------- // Conversion helper functions @@ -149,9 +151,10 @@ namespace DirectX CONVF_STENCIL = 0x40, CONVF_SHAREDEXP = 0x80, CONVF_BGR = 0x100, - CONVF_X2 = 0x200, + CONVF_XR = 0x200, CONVF_PACKED = 0x400, CONVF_BC = 0x800, + CONVF_YUV = 0x1000, CONVF_R = 0x10000, CONVF_G = 0x20000, CONVF_B = 0x40000, @@ -160,40 +163,60 @@ namespace DirectX CONVF_RGBA_MASK = 0xF0000, }; - DWORD _GetConvertFlags( _In_ DXGI_FORMAT format ); - - void _CopyScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ size_t outSize, - _In_bytecount_(inSize) LPCVOID pSource, _In_ size_t inSize, - _In_ DXGI_FORMAT format, _In_ DWORD flags ); - - void _SwizzleScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ size_t outSize, - _In_bytecount_(inSize) LPCVOID pSource, _In_ size_t inSize, - _In_ DXGI_FORMAT format, _In_ DWORD flags ); - - bool _ExpandScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ size_t outSize, - _In_ DXGI_FORMAT outFormat, - _In_bytecount_(inSize) LPCVOID pSource, _In_ size_t inSize, - _In_ DXGI_FORMAT inFormat, _In_ DWORD flags ); - - bool _LoadScanline( _Out_cap_(count) XMVECTOR* pDestination, _In_ size_t count, - _In_bytecount_(size) LPCVOID pSource, _In_ size_t size, _In_ DXGI_FORMAT format ); - - bool _StoreScanline( _Out_bytecap_(size) LPVOID pDestination, _In_ size_t size, _In_ DXGI_FORMAT format, - _In_count_(count) const XMVECTOR* pSource, _In_ size_t count ); - - HRESULT _ConvertToR32G32B32A32( _In_ const Image& srcImage, _Inout_ ScratchImage& image ); - - HRESULT _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ const Image& destImage ); - HRESULT _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _Inout_ ScratchImage& image ); - HRESULT _ConvertFromR32G32B32A32( _In_count_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, - _In_ DXGI_FORMAT format, _Out_ ScratchImage& result ); - - void _ConvertScanline( _Inout_count_(count) XMVECTOR* pBuffer, _In_ size_t count, - _In_ DXGI_FORMAT outFormat, _In_ DXGI_FORMAT inFormat, _In_ DWORD flags ); + DWORD __cdecl _GetConvertFlags( _In_ DXGI_FORMAT format ); + + void __cdecl _CopyScanline( _When_(pDestination == pSource, _Inout_updates_bytes_(outSize)) + _When_(pDestination != pSource, _Out_writes_bytes_(outSize)) + LPVOID pDestination, _In_ size_t outSize, + _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, + _In_ DXGI_FORMAT format, _In_ DWORD flags ); + + void __cdecl _SwizzleScanline( _When_(pDestination == pSource, _In_) + _When_(pDestination != pSource, _Out_writes_bytes_(outSize)) + LPVOID pDestination, _In_ size_t outSize, + _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, + _In_ DXGI_FORMAT format, _In_ DWORD flags ); + + _Success_(return != false) + bool __cdecl _ExpandScanline( _Out_writes_bytes_(outSize) LPVOID pDestination, _In_ size_t outSize, + _In_ DXGI_FORMAT outFormat, + _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, + _In_ DXGI_FORMAT inFormat, _In_ DWORD flags ); + + _Success_(return != false) + bool __cdecl _LoadScanline( _Out_writes_(count) XMVECTOR* pDestination, _In_ size_t count, + _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DXGI_FORMAT format ); + + _Success_(return != false) + bool __cdecl _LoadScanlineLinear( _Out_writes_(count) XMVECTOR* pDestination, _In_ size_t count, + _In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DXGI_FORMAT format, _In_ DWORD flags ); + + _Success_(return != false) + bool __cdecl _StoreScanline( LPVOID pDestination, _In_ size_t size, _In_ DXGI_FORMAT format, + _In_reads_(count) const XMVECTOR* pSource, _In_ size_t count, _In_ float threshold = 0 ); + + _Success_(return != false) + bool __cdecl _StoreScanlineLinear( LPVOID pDestination, _In_ size_t size, _In_ DXGI_FORMAT format, + _Inout_updates_all_(count) XMVECTOR* pSource, _In_ size_t count, _In_ DWORD flags, _In_ float threshold = 0 ); + + _Success_(return != false) + bool __cdecl _StoreScanlineDither( LPVOID pDestination, _In_ size_t size, _In_ DXGI_FORMAT format, + _Inout_updates_all_(count) XMVECTOR* pSource, _In_ size_t count, _In_ float threshold, size_t y, size_t z, + _Inout_updates_all_opt_(count+2) XMVECTOR* pDiffusionErrors ); + + HRESULT __cdecl _ConvertToR32G32B32A32( _In_ const Image& srcImage, _Inout_ ScratchImage& image ); + + HRESULT __cdecl _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ const Image& destImage ); + HRESULT __cdecl _ConvertFromR32G32B32A32( _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _Inout_ ScratchImage& image ); + HRESULT __cdecl _ConvertFromR32G32B32A32( _In_reads_(nimages) const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata, + _In_ DXGI_FORMAT format, _Out_ ScratchImage& result ); + + void __cdecl _ConvertScanline( _Inout_updates_all_(count) XMVECTOR* pBuffer, _In_ size_t count, + _In_ DXGI_FORMAT outFormat, _In_ DXGI_FORMAT inFormat, _In_ DWORD flags ); //--------------------------------------------------------------------------------- // DDS helper functions - HRESULT _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, - _Out_opt_cap_x_(maxsize) LPVOID pDestination, _In_ size_t maxsize, _Out_ size_t& required ); + HRESULT __cdecl _EncodeDDSHeader( _In_ const TexMetadata& metadata, DWORD flags, + _Out_writes_bytes_to_opt_(maxsize, required) LPVOID pDestination, _In_ size_t maxsize, _Out_ size_t& required ); }; // namespace diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexPMAlpha.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexPMAlpha.cpp new file mode 100644 index 00000000..d5da2069 --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/DirectXTexPMAlpha.cpp @@ -0,0 +1,229 @@ +//------------------------------------------------------------------------------------- +// DirectXTexPMAlpha.cpp +// +// DirectX Texture Library - Premultiplied alpha operations +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +//------------------------------------------------------------------------------------- + +#include "directxtexp.h" + +namespace DirectX +{ + +static HRESULT _PremultiplyAlpha( _In_ const Image& srcImage, _In_ const Image& destImage ) +{ + assert( srcImage.width == destImage.width ); + assert( srcImage.height == destImage.height ); + + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*srcImage.width), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + const uint8_t *pSrc = srcImage.pixels; + uint8_t *pDest = destImage.pixels; + if ( !pSrc || !pDest ) + return E_POINTER; + + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanline( scanline.get(), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) ) + return E_FAIL; + + XMVECTOR* ptr = scanline.get(); + for( size_t w = 0; w < srcImage.width; ++w ) + { + XMVECTOR v = *ptr; + XMVECTOR alpha = XMVectorSplatW( *ptr ); + alpha = XMVectorMultiply( v, alpha ); + *(ptr++) = XMVectorSelect( v, alpha, g_XMSelect1110 ); + } + + if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, scanline.get(), srcImage.width ) ) + return E_FAIL; + + pSrc += srcImage.rowPitch; + pDest += destImage.rowPitch; + } + + return S_OK; +} + +static HRESULT _PremultiplyAlphaLinear( _In_ const Image& srcImage, _In_ DWORD flags, _In_ const Image& destImage ) +{ + assert( srcImage.width == destImage.width ); + assert( srcImage.height == destImage.height ); + + static_assert( TEX_PMALPHA_SRGB_IN == TEX_FILTER_SRGB_IN, "TEX_PMALHPA_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_PMALPHA_SRGB_OUT == TEX_FILTER_SRGB_OUT, "TEX_PMALHPA_SRGB* should match TEX_FILTER_SRGB*" ); + static_assert( TEX_PMALPHA_SRGB == TEX_FILTER_SRGB, "TEX_PMALHPA_SRGB* should match TEX_FILTER_SRGB*" ); + flags &= TEX_PMALPHA_SRGB; + + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*srcImage.width), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + const uint8_t *pSrc = srcImage.pixels; + uint8_t *pDest = destImage.pixels; + if ( !pSrc || !pDest ) + return E_POINTER; + + for( size_t h = 0; h < srcImage.height; ++h ) + { + if ( !_LoadScanlineLinear( scanline.get(), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format, flags ) ) + return E_FAIL; + + XMVECTOR* ptr = scanline.get(); + for( size_t w = 0; w < srcImage.width; ++w ) + { + XMVECTOR v = *ptr; + XMVECTOR alpha = XMVectorSplatW( *ptr ); + alpha = XMVectorMultiply( v, alpha ); + *(ptr++) = XMVectorSelect( v, alpha, g_XMSelect1110 ); + } + + if ( !_StoreScanlineLinear( pDest, destImage.rowPitch, destImage.format, scanline.get(), srcImage.width, flags ) ) + return E_FAIL; + + pSrc += srcImage.rowPitch; + pDest += destImage.rowPitch; + } + + return S_OK; +} + + +//===================================================================================== +// Entry-points +//===================================================================================== + +//------------------------------------------------------------------------------------- +// Converts to a premultiplied alpha version of the texture +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT PremultiplyAlpha( const Image& srcImage, DWORD flags, ScratchImage& image ) +{ + if ( !srcImage.pixels ) + return E_POINTER; + + if ( IsCompressed(srcImage.format) + || IsPlanar(srcImage.format) + || IsPalettized(srcImage.format) + || IsTypeless(srcImage.format) + || !HasAlpha(srcImage.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + +#ifdef _M_X64 + if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) + return E_INVALIDARG; +#endif + + HRESULT hr = image.Initialize2D( srcImage.format, srcImage.width, srcImage.height, 1, 1 ); + if ( FAILED(hr) ) + return hr; + + const Image *rimage = image.GetImage( 0, 0, 0 ); + if ( !rimage ) + { + image.Release(); + return E_POINTER; + } + + hr = ( flags & TEX_PMALPHA_IGNORE_SRGB ) ? _PremultiplyAlpha( srcImage, *rimage ) : _PremultiplyAlphaLinear( srcImage, flags, *rimage ); + if ( FAILED(hr) ) + { + image.Release(); + return hr; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +// Converts to a premultiplied alpha version of the texture (complex) +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT PremultiplyAlpha( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DWORD flags, ScratchImage& result ) +{ + if ( !srcImages || !nimages ) + return E_INVALIDARG; + + if ( IsCompressed(metadata.format) + || IsPlanar(metadata.format) + || IsPalettized(metadata.format) + || IsTypeless(metadata.format) + || !HasAlpha(metadata.format) ) + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + +#ifdef _M_X64 + if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) ) + return E_INVALIDARG; +#endif + + if ( metadata.IsPMAlpha() ) + { + // Already premultiplied + return E_FAIL; + } + + TexMetadata mdata2 = metadata; + mdata2.SetAlphaMode(TEX_ALPHA_MODE_PREMULTIPLIED); + HRESULT hr = result.Initialize( mdata2 ); + if ( FAILED(hr) ) + return hr; + + if ( nimages != result.GetImageCount() ) + { + result.Release(); + return E_FAIL; + } + + const Image* dest = result.GetImages(); + if ( !dest ) + { + result.Release(); + return E_POINTER; + } + + for( size_t index=0; index < nimages; ++index ) + { + const Image& src = srcImages[ index ]; + if ( src.format != metadata.format ) + { + result.Release(); + return E_FAIL; + } + +#ifdef _M_X64 + if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) ) + return E_FAIL; +#endif + const Image& dst = dest[ index ]; + assert( dst.format == metadata.format ); + + if ( src.width != dst.width || src.height != dst.height ) + { + result.Release(); + return E_FAIL; + } + + hr = ( flags & TEX_PMALPHA_IGNORE_SRGB ) ? _PremultiplyAlpha( src, dst ) : _PremultiplyAlphaLinear( src, flags, dst ); + if ( FAILED(hr) ) + { + result.Release(); + return hr; + } + } + + return S_OK; +} + +}; // namespace diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexResize.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexResize.cpp index 9f70c929..81b65beb 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexResize.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexResize.cpp @@ -15,15 +15,21 @@ #include "DirectXTexP.h" +#include "filters.h" + +using Microsoft::WRL::ComPtr; + namespace DirectX { +//------------------------------------------------------------------------------------- +// WIC related helper functions +//------------------------------------------------------------------------------------- + extern HRESULT _ResizeSeparateColorAndAlpha( _In_ IWICImagingFactory* pWIC, _In_ IWICBitmap* original, _In_ size_t newWidth, _In_ size_t newHeight, _In_ DWORD filter, _Inout_ const Image* img ); -//------------------------------------------------------------------------------------- -// Do image resize using WIC -//------------------------------------------------------------------------------------- +//--- Do image resize using WIC --- static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const WICPixelFormatGUID& pfGUID, _In_ const Image& destImage ) { @@ -36,13 +42,13 @@ static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD fi if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICComponentInfo> componentInfo; - HRESULT hr = pWIC->CreateComponentInfo( pfGUID, &componentInfo ); + ComPtr<IWICComponentInfo> componentInfo; + HRESULT hr = pWIC->CreateComponentInfo( pfGUID, componentInfo.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICPixelFormatInfo2> pixelFormatInfo; - hr = componentInfo->QueryInterface( __uuidof(IWICPixelFormatInfo2), (void**)&pixelFormatInfo ); + ComPtr<IWICPixelFormatInfo2> pixelFormatInfo; + hr = componentInfo.As( &pixelFormatInfo ); if ( FAILED(hr) ) return hr; @@ -51,10 +57,10 @@ static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD fi if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmap> source; + ComPtr<IWICBitmap> source; hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID, static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ), - srcImage.pixels, &source ); + srcImage.pixels, source.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -66,8 +72,8 @@ static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD fi } else { - ScopedObject<IWICBitmapScaler> scaler; - hr = pWIC->CreateBitmapScaler( &scaler ); + ComPtr<IWICBitmapScaler> scaler; + hr = pWIC->CreateBitmapScaler( scaler.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -90,11 +96,18 @@ static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD fi { // The WIC bitmap scaler is free to return a different pixel format than the source image, so here we // convert it back - ScopedObject<IWICFormatConverter> FC; - hr = pWIC->CreateFormatConverter( &FC ); + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; + BOOL canConvert = FALSE; + hr = FC->CanConvert( pfScaler, pfGUID, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + hr = FC->Initialize( scaler.Get(), pfGUID, _GetWICDither( filter ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; @@ -109,9 +122,7 @@ static HRESULT _PerformResizeUsingWIC( _In_ const Image& srcImage, _In_ DWORD fi } -//------------------------------------------------------------------------------------- -// Do conversion, resize using WIC, conversion cycle -//------------------------------------------------------------------------------------- +//--- Do conversion, resize using WIC, conversion cycle --- static HRESULT _PerformResizeViaF32( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) { if ( !srcImage.pixels || !destImage.pixels ) @@ -152,6 +163,662 @@ static HRESULT _PerformResizeViaF32( _In_ const Image& srcImage, _In_ DWORD filt } +//--- determine when to use WIC vs. non-WIC paths --- +static bool _UseWICFiltering( _In_ DXGI_FORMAT format, _In_ DWORD filter ) +{ + if ( filter & TEX_FILTER_FORCE_NON_WIC ) + { + // Explicit flag indicates use of non-WIC code paths + return false; + } + + if ( filter & TEX_FILTER_FORCE_WIC ) + { + // Explicit flag to use WIC code paths, skips all the case checks below + return true; + } + + if ( IsSRGB(format) || (filter & TEX_FILTER_SRGB) ) + { + // Use non-WIC code paths for sRGB correct filtering + return false; + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT + || format == DXGI_FORMAT_R16_FLOAT ) + { + // Use non-WIC code paths as these conversions are not supported by Xbox One XDK + return false; + } +#endif + + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); + + switch ( filter & TEX_FILTER_MASK ) + { + case TEX_FILTER_LINEAR: + if ( filter & TEX_FILTER_WRAP ) + { + // WIC only supports 'clamp' semantics (MIRROR is equivalent to clamp for linear) + return false; + } + + if ( BitsPerColor(format) > 8 ) + { + // Avoid the WIC bitmap scaler when doing Linear filtering of XR/HDR formats + return false; + } + break; + + case TEX_FILTER_CUBIC: + if ( filter & ( TEX_FILTER_WRAP | TEX_FILTER_MIRROR ) ) + { + // WIC only supports 'clamp' semantics + return false; + } + + if ( BitsPerColor(format) > 8 ) + { + // Avoid the WIC bitmap scaler when doing Cubic filtering of XR/HDR formats + return false; + } + break; + + case TEX_FILTER_TRIANGLE: + // WIC does not implement this filter + return false; + } + + return true; +} + + +//------------------------------------------------------------------------------------- +// Resize custom filters +//------------------------------------------------------------------------------------- + +//--- Point Filter --- +static HRESULT _ResizePointFilter( _In_ const Image& srcImage, _In_ const Image& destImage ) +{ + assert( srcImage.pixels && destImage.pixels ); + assert( srcImage.format == destImage.format ); + + // Allocate temporary space (2 scanlines) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( + ( sizeof(XMVECTOR) * (srcImage.width + destImage.width ) ), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row = target + destImage.width; + +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*srcImage.width ); +#endif + + const uint8_t* pSrc = srcImage.pixels; + uint8_t* pDest = destImage.pixels; + + size_t rowPitch = srcImage.rowPitch; + + size_t xinc = ( srcImage.width << 16 ) / destImage.width; + size_t yinc = ( srcImage.height << 16 ) / destImage.height; + + size_t lasty = size_t(-1); + + size_t sy = 0; + for( size_t y = 0; y < destImage.height; ++y ) + { + if ( (lasty ^ sy) >> 16 ) + { + if ( !_LoadScanline( row, srcImage.width, pSrc + ( rowPitch * (sy >> 16) ), rowPitch, srcImage.format ) ) + return E_FAIL; + lasty = sy; + } + + size_t sx = 0; + for( size_t x = 0; x < destImage.width; ++x ) + { + target[ x ] = row[ sx >> 16 ]; + sx += xinc; + } + + if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, target, destImage.width ) ) + return E_FAIL; + pDest += destImage.rowPitch; + + sy += yinc; + } + + return S_OK; +} + + +//--- Box Filter --- +static HRESULT _ResizeBoxFilter( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +{ + assert( srcImage.pixels && destImage.pixels ); + assert( srcImage.format == destImage.format ); + + if ( ( (destImage.width << 1) != srcImage.width ) || ( (destImage.height << 1) != srcImage.height ) ) + return E_FAIL; + + // Allocate temporary space (3 scanlines) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( + ( sizeof(XMVECTOR) * ( srcImage.width*2 + destImage.width ) ), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + XMVECTOR* target = scanline.get(); + + XMVECTOR* urow0 = target + destImage.width; + XMVECTOR* urow1 = urow0 + srcImage.width; + +#ifdef _DEBUG + memset( urow0, 0xCD, sizeof(XMVECTOR)*srcImage.width ); + memset( urow1, 0xDD, sizeof(XMVECTOR)*srcImage.width ); +#endif + + const XMVECTOR* urow2 = urow0 + 1; + const XMVECTOR* urow3 = urow1 + 1; + + const uint8_t* pSrc = srcImage.pixels; + uint8_t* pDest = destImage.pixels; + + size_t rowPitch = srcImage.rowPitch; + + for( size_t y = 0; y < destImage.height; ++y ) + { + if ( !_LoadScanlineLinear( urow0, srcImage.width, pSrc, rowPitch, srcImage.format, filter ) ) + return E_FAIL; + pSrc += rowPitch; + + if ( urow0 != urow1 ) + { + if ( !_LoadScanlineLinear( urow1, srcImage.width, pSrc, rowPitch, srcImage.format, filter ) ) + return E_FAIL; + pSrc += rowPitch; + } + + for( size_t x = 0; x < destImage.width; ++x ) + { + size_t x2 = x << 1; + + AVERAGE4( target[ x ], urow0[ x2 ], urow1[ x2 ], urow2[ x2 ], urow3[ x2 ] ); + } + + if ( !_StoreScanlineLinear( pDest, destImage.rowPitch, destImage.format, target, destImage.width, filter ) ) + return E_FAIL; + pDest += destImage.rowPitch; + } + + return S_OK; +} + + +//--- Linear Filter --- +static HRESULT _ResizeLinearFilter( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +{ + assert( srcImage.pixels && destImage.pixels ); + assert( srcImage.format == destImage.format ); + + // Allocate temporary space (3 scanlines, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( + ( sizeof(XMVECTOR) * ( srcImage.width*2 + destImage.width ) ), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<LinearFilter[]> lf( new (std::nothrow) LinearFilter[ destImage.width + destImage.height ] ); + if ( !lf ) + return E_OUTOFMEMORY; + + LinearFilter* lfX = lf.get(); + LinearFilter* lfY = lf.get() + destImage.width; + + _CreateLinearFilter( srcImage.width, destImage.width, (filter & TEX_FILTER_WRAP_U) != 0, lfX ); + _CreateLinearFilter( srcImage.height, destImage.height, (filter & TEX_FILTER_WRAP_V) != 0, lfY ); + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row0 = target + destImage.width; + XMVECTOR* row1 = row0 + srcImage.width; + +#ifdef _DEBUG + memset( row0, 0xCD, sizeof(XMVECTOR)*srcImage.width ); + memset( row1, 0xDD, sizeof(XMVECTOR)*srcImage.width ); +#endif + + const uint8_t* pSrc = srcImage.pixels; + uint8_t* pDest = destImage.pixels; + + size_t rowPitch = srcImage.rowPitch; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + + for( size_t y = 0; y < destImage.height; ++y ) + { + auto& toY = lfY[ y ]; + + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( row0, srcImage.width, pSrc + (rowPitch * u0), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + else + { + u0 = u1; + u1 = size_t(-1); + + std::swap( row0, row1 ); + } + } + + if ( toY.u1 != u1 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( row1, srcImage.width, pSrc + (rowPitch * u1), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < destImage.width; ++x ) + { + auto& toX = lfX[ x ]; + + BILINEAR_INTERPOLATE( target[x], toX, toY, row0, row1 ); + } + + if ( !_StoreScanlineLinear( pDest, destImage.rowPitch, destImage.format, target, destImage.width, filter ) ) + return E_FAIL; + pDest += destImage.rowPitch; + } + + return S_OK; +} + + +//--- Cubic Filter --- +static HRESULT _ResizeCubicFilter( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +{ + assert( srcImage.pixels && destImage.pixels ); + assert( srcImage.format == destImage.format ); + + // Allocate temporary space (5 scanlines, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( + ( sizeof(XMVECTOR) * ( srcImage.width*4 + destImage.width ) ), 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<CubicFilter[]> cf( new (std::nothrow) CubicFilter[ destImage.width + destImage.height ] ); + if ( !cf ) + return E_OUTOFMEMORY; + + CubicFilter* cfX = cf.get(); + CubicFilter* cfY = cf.get() + destImage.width; + + _CreateCubicFilter( srcImage.width, destImage.width, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX ); + _CreateCubicFilter( srcImage.height, destImage.height, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY ); + + XMVECTOR* target = scanline.get(); + + XMVECTOR* row0 = target + destImage.width; + XMVECTOR* row1 = row0 + srcImage.width; + XMVECTOR* row2 = row0 + srcImage.width*2; + XMVECTOR* row3 = row0 + srcImage.width*3; + +#ifdef _DEBUG + memset( row0, 0xCD, sizeof(XMVECTOR)*srcImage.width ); + memset( row1, 0xDD, sizeof(XMVECTOR)*srcImage.width ); + memset( row2, 0xED, sizeof(XMVECTOR)*srcImage.width ); + memset( row3, 0xFD, sizeof(XMVECTOR)*srcImage.width ); +#endif + + const uint8_t* pSrc = srcImage.pixels; + uint8_t* pDest = destImage.pixels; + + size_t rowPitch = srcImage.rowPitch; + + size_t u0 = size_t(-1); + size_t u1 = size_t(-1); + size_t u2 = size_t(-1); + size_t u3 = size_t(-1); + + for( size_t y = 0; y < destImage.height; ++y ) + { + auto& toY = cfY[ y ]; + + // Scanline 1 + if ( toY.u0 != u0 ) + { + if ( toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3 ) + { + u0 = toY.u0; + + if ( !_LoadScanlineLinear( row0, srcImage.width, pSrc + (rowPitch * u0), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + else if ( toY.u0 == u1 ) + { + u0 = u1; + u1 = size_t(-1); + + std::swap( row0, row1 ); + } + else if ( toY.u0 == u2 ) + { + u0 = u2; + u2 = size_t(-1); + + std::swap( row0, row2 ); + } + else if ( toY.u0 == u3 ) + { + u0 = u3; + u3 = size_t(-1); + + std::swap( row0, row3 ); + } + } + + // Scanline 2 + if ( toY.u1 != u1 ) + { + if ( toY.u1 != u2 && toY.u1 != u3 ) + { + u1 = toY.u1; + + if ( !_LoadScanlineLinear( row1, srcImage.width, pSrc + (rowPitch * u1), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + else if ( toY.u1 == u2 ) + { + u1 = u2; + u2 = size_t(-1); + + std::swap( row1, row2 ); + } + else if ( toY.u1 == u3 ) + { + u1 = u3; + u3 = size_t(-1); + + std::swap( row1, row3 ); + } + } + + // Scanline 3 + if ( toY.u2 != u2 ) + { + if ( toY.u2 != u3 ) + { + u2 = toY.u2; + + if ( !_LoadScanlineLinear( row2, srcImage.width, pSrc + (rowPitch * u2), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + else + { + u2 = u3; + u3 = size_t(-1); + + std::swap( row2, row3 ); + } + } + + // Scanline 4 + if ( toY.u3 != u3 ) + { + u3 = toY.u3; + + if ( !_LoadScanlineLinear( row3, srcImage.width, pSrc + (rowPitch * u3), rowPitch, srcImage.format, filter ) ) + return E_FAIL; + } + + for( size_t x = 0; x < destImage.width; ++x ) + { + auto& toX = cfX[ x ]; + + XMVECTOR C0, C1, C2, C3; + + CUBIC_INTERPOLATE( C0, toX.x, row0[ toX.u0 ], row0[ toX.u1 ], row0[ toX.u2 ], row0[ toX.u3 ] ); + CUBIC_INTERPOLATE( C1, toX.x, row1[ toX.u0 ], row1[ toX.u1 ], row1[ toX.u2 ], row1[ toX.u3 ] ); + CUBIC_INTERPOLATE( C2, toX.x, row2[ toX.u0 ], row2[ toX.u1 ], row2[ toX.u2 ], row2[ toX.u3 ] ); + CUBIC_INTERPOLATE( C3, toX.x, row3[ toX.u0 ], row3[ toX.u1 ], row3[ toX.u2 ], row3[ toX.u3 ] ); + + CUBIC_INTERPOLATE( target[x], toY.x, C0, C1, C2, C3 ); + } + + if ( !_StoreScanlineLinear( pDest, destImage.rowPitch, destImage.format, target, destImage.width, filter ) ) + return E_FAIL; + pDest += destImage.rowPitch; + } + + return S_OK; +} + + +//--- Triangle Filter --- +static HRESULT _ResizeTriangleFilter( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +{ + assert( srcImage.pixels && destImage.pixels ); + assert( srcImage.format == destImage.format ); + + using namespace TriangleFilter; + + // Allocate initial temporary space (1 scanline, accumulation rows, plus X and Y filters) + ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR) * srcImage.width, 16 ) ) ); + if ( !scanline ) + return E_OUTOFMEMORY; + + std::unique_ptr<TriangleRow[]> rowActive( new (std::nothrow) TriangleRow[ destImage.height ] ); + if ( !rowActive ) + return E_OUTOFMEMORY; + + TriangleRow * rowFree = nullptr; + + std::unique_ptr<Filter> tfX; + HRESULT hr = _Create( srcImage.width, destImage.width, (filter & TEX_FILTER_WRAP_U) != 0, tfX ); + if ( FAILED(hr) ) + return hr; + + std::unique_ptr<Filter> tfY; + hr = _Create( srcImage.height, destImage.height, (filter & TEX_FILTER_WRAP_V) != 0, tfY ); + if ( FAILED(hr) ) + return hr; + + XMVECTOR* row = scanline.get(); + +#ifdef _DEBUG + memset( row, 0xCD, sizeof(XMVECTOR)*srcImage.width ); +#endif + + auto xFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfX.get() ) + tfX->sizeInBytes ); + auto yFromEnd = reinterpret_cast<const FilterFrom*>( reinterpret_cast<const uint8_t*>( tfY.get() ) + tfY->sizeInBytes ); + + // Count times rows get written + for( FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; ) + { + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < destImage.height ); + ++rowActive[ v ].remaining; + } + + yFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( yFrom ) + yFrom->sizeInBytes ); + } + + // Filter image + const uint8_t* pSrc = srcImage.pixels; + size_t rowPitch = srcImage.rowPitch; + const uint8_t* pEndSrc = pSrc + rowPitch * srcImage.height; + + uint8_t* pDest = destImage.pixels; + + for( FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; ) + { + // Create accumulation rows as needed + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < destImage.height ); + TriangleRow* rowAcc = &rowActive[ v ]; + + if ( !rowAcc->scanline ) + { + if ( rowFree ) + { + // Steal and reuse scanline from 'free row' list + assert( rowFree->scanline != 0 ); + rowAcc->scanline.reset( rowFree->scanline.release() ); + rowFree = rowFree->next; + } + else + { + rowAcc->scanline.reset( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR) * destImage.width, 16 ) ) ); + if ( !rowAcc->scanline ) + return E_OUTOFMEMORY; + } + + memset( rowAcc->scanline.get(), 0, sizeof(XMVECTOR) * destImage.width ); + } + } + + // Load source scanline + if ( (pSrc + rowPitch) > pEndSrc ) + return E_FAIL; + + if ( !_LoadScanlineLinear( row, srcImage.width, pSrc, rowPitch, srcImage.format, filter ) ) + return E_FAIL; + + pSrc += rowPitch; + + // Process row + size_t x = 0; + for( FilterFrom* xFrom = tfX->from; xFrom < xFromEnd; ++x ) + { + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < destImage.height ); + float yweight = yFrom->to[ j ].weight; + + XMVECTOR* accPtr = rowActive[ v ].scanline.get(); + if ( !accPtr ) + return E_POINTER; + + for ( size_t k = 0; k < xFrom->count; ++k ) + { + size_t u = xFrom->to[ k ].u; + assert( u < destImage.width ); + + XMVECTOR weight = XMVectorReplicate( yweight * xFrom->to[ k ].weight ); + + assert( x < srcImage.width ); + accPtr[ u ] = XMVectorMultiplyAdd( row[ x ], weight, accPtr[ u ] ); + } + } + + xFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( xFrom ) + xFrom->sizeInBytes ); + } + + // Write completed accumulation rows + for ( size_t j = 0; j < yFrom->count; ++j ) + { + size_t v = yFrom->to[ j ].u; + assert( v < destImage.height ); + TriangleRow* rowAcc = &rowActive[ v ]; + + assert( rowAcc->remaining > 0 ); + --rowAcc->remaining; + + if ( !rowAcc->remaining ) + { + XMVECTOR* pAccSrc = rowAcc->scanline.get(); + if ( !pAccSrc ) + return E_POINTER; + + switch( destImage.format ) + { + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + { + // Need to slightly bias results for floating-point error accumulation which can + // be visible with harshly quantized values + static const XMVECTORF32 Bias = { 0.f, 0.f, 0.f, 0.1f }; + + XMVECTOR* ptr = pAccSrc; + for( size_t i=0; i < destImage.width; ++i, ++ptr ) + { + *ptr = XMVectorAdd( *ptr, Bias ); + } + } + break; + } + + // This performs any required clamping + if ( !_StoreScanlineLinear( pDest + (destImage.rowPitch * v), destImage.rowPitch, destImage.format, pAccSrc, destImage.width, filter ) ) + return E_FAIL; + + // Put row on freelist to reuse it's allocated scanline + rowAcc->next = rowFree; + rowFree = rowAcc; + } + } + + yFrom = reinterpret_cast<FilterFrom*>( reinterpret_cast<uint8_t*>( yFrom ) + yFrom->sizeInBytes ); + } + + return S_OK; +} + + +//--- Custom filter resize --- +static HRESULT _PerformResizeUsingCustomFilters( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage ) +{ + if ( !srcImage.pixels || !destImage.pixels ) + return E_POINTER; + + static_assert( TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK" ); + + DWORD filter_select = ( filter & TEX_FILTER_MASK ); + if ( !filter_select ) + { + // Default filter choice + filter_select = ( ( (destImage.width << 1) == srcImage.width ) && ( (destImage.height << 1) == srcImage.height ) ) + ? TEX_FILTER_BOX : TEX_FILTER_LINEAR; + } + + switch( filter_select ) + { + case TEX_FILTER_POINT: + return _ResizePointFilter( srcImage, destImage ); + + case TEX_FILTER_BOX: + return _ResizeBoxFilter( srcImage, filter, destImage ); + + case TEX_FILTER_LINEAR: + return _ResizeLinearFilter( srcImage, filter, destImage ); + + case TEX_FILTER_CUBIC: + return _ResizeCubicFilter( srcImage, filter, destImage ); + + case TEX_FILTER_TRIANGLE: + return _ResizeTriangleFilter( srcImage, filter, destImage ); + + default: + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } +} + + //===================================================================================== // Entry-points //===================================================================================== @@ -159,12 +826,13 @@ static HRESULT _PerformResizeViaF32( _In_ const Image& srcImage, _In_ DWORD filt //------------------------------------------------------------------------------------- // Resize image //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Resize( const Image& srcImage, size_t width, size_t height, DWORD filter, ScratchImage& image ) { if ( width == 0 || height == 0 ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) ) return E_INVALIDARG; @@ -189,18 +857,23 @@ HRESULT Resize( const Image& srcImage, size_t width, size_t height, DWORD filter if ( !rimage ) return E_POINTER; - // WIC only supports CLAMP - - WICPixelFormatGUID pfGUID; - if ( _DXGIToWIC( srcImage.format, pfGUID ) ) + if ( _UseWICFiltering( srcImage.format, filter ) ) { - // Case 1: Source format is supported by Windows Imaging Component - hr = _PerformResizeUsingWIC( srcImage, filter, pfGUID, *rimage ); + WICPixelFormatGUID pfGUID; + if ( _DXGIToWIC( srcImage.format, pfGUID, true ) ) + { + // Case 1: Source format is supported by Windows Imaging Component + hr = _PerformResizeUsingWIC( srcImage, filter, pfGUID, *rimage ); + } + else + { + // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back + hr = _PerformResizeViaF32( srcImage, filter, *rimage ); + } } else { - // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back - hr = _PerformResizeViaF32( srcImage, filter, *rimage ); + hr = _PerformResizeUsingCustomFilters( srcImage, filter, *rimage ); } if ( FAILED(hr) ) @@ -216,13 +889,14 @@ HRESULT Resize( const Image& srcImage, size_t width, size_t height, DWORD filter //------------------------------------------------------------------------------------- // Resize image (complex) //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metadata, size_t width, size_t height, DWORD filter, ScratchImage& result ) { if ( !srcImages || !nimages || width == 0 || height == 0 ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( (width > 0xFFFFFFFF) || (height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif @@ -235,8 +909,10 @@ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metad if ( FAILED(hr) ) return hr; - WICPixelFormatGUID pfGUID; - bool wicpf = _DXGIToWIC( metadata.format, pfGUID ); + bool usewic = _UseWICFiltering( metadata.format, filter ); + + WICPixelFormatGUID pfGUID = {0}; + bool wicpf = ( usewic ) ? _DXGIToWIC( metadata.format, pfGUID, true ) : false; switch ( metadata.dimension ) { @@ -267,7 +943,7 @@ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metad return E_FAIL; } -#ifdef _AMD64_ +#ifdef _M_X64 if ( (srcimg->width > 0xFFFFFFFF) || (srcimg->height > 0xFFFFFFFF) ) { result.Release(); @@ -275,15 +951,23 @@ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metad } #endif - if ( wicpf ) + if ( usewic ) { - // Case 1: Source format is supported by Windows Imaging Component - hr = _PerformResizeUsingWIC( *srcimg, filter, pfGUID, *destimg ); + if ( wicpf ) + { + // Case 1: Source format is supported by Windows Imaging Component + hr = _PerformResizeUsingWIC( *srcimg, filter, pfGUID, *destimg ); + } + else + { + // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back + hr = _PerformResizeViaF32( *srcimg, filter, *destimg ); + } } else { - // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back - hr = _PerformResizeViaF32( *srcimg, filter, *destimg ); + // Case 3: not using WIC resizing + hr = _PerformResizeUsingCustomFilters( *srcimg, filter, *destimg ); } if ( FAILED(hr) ) @@ -320,7 +1004,7 @@ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metad return E_FAIL; } -#ifdef _AMD64_ +#ifdef _M_X64 if ( (srcimg->width > 0xFFFFFFFF) || (srcimg->height > 0xFFFFFFFF) ) { result.Release(); @@ -328,15 +1012,23 @@ HRESULT Resize( const Image* srcImages, size_t nimages, const TexMetadata& metad } #endif - if ( wicpf ) + if ( usewic ) { - // Case 1: Source format is supported by Windows Imaging Component - hr = _PerformResizeUsingWIC( *srcimg, filter, pfGUID, *destimg ); + if ( wicpf ) + { + // Case 1: Source format is supported by Windows Imaging Component + hr = _PerformResizeUsingWIC( *srcimg, filter, pfGUID, *destimg ); + } + else + { + // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back + hr = _PerformResizeViaF32( *srcimg, filter, *destimg ); + } } else { - // Case 2: Source format is not supported by WIC, so we have to convert, resize, and convert back - hr = _PerformResizeViaF32( *srcimg, filter, *destimg ); + // Case 3: not using WIC resizing + hr = _PerformResizeUsingCustomFilters( *srcimg, filter, *destimg ); } if ( FAILED(hr) ) diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexTGA.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexTGA.cpp index e71575a5..3a775bf7 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexTGA.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexTGA.cpp @@ -19,7 +19,7 @@ // The implementation here has the following limitations: // * Does not support files that contain color maps (these are rare in practice) // * Interleaved files are not supported (deprecated aspect of TGA format) -// * Only supports 8-bit greyscale; 16-, 24-, and 32-bit truecolor images +// * Only supports 8-bit grayscale; 16-, 24-, and 32-bit truecolor images // * Always writes uncompressed files (i.e. can read RLE compression, but does not write it) // @@ -116,7 +116,7 @@ namespace DirectX //------------------------------------------------------------------------------------- // Decodes TGA header //------------------------------------------------------------------------------------- -static HRESULT _DecodeTGAHeader( _In_bytecount_(size) LPCVOID pSource, size_t size, _Out_ TexMetadata& metadata, size_t& offset, +static HRESULT _DecodeTGAHeader( _In_reads_bytes_(size) LPCVOID pSource, size_t size, _Out_ TexMetadata& metadata, size_t& offset, _Inout_opt_ DWORD* convFlags ) { if ( !pSource ) @@ -129,8 +129,7 @@ static HRESULT _DecodeTGAHeader( _In_bytecount_(size) LPCVOID pSource, size_t si return HRESULT_FROM_WIN32( ERROR_INVALID_DATA ); } - const TGA_HEADER* pHeader = reinterpret_cast<const TGA_HEADER*>( pSource ); - assert( pHeader ); + auto pHeader = reinterpret_cast<const TGA_HEADER*>( pSource ); if ( pHeader->bColorMapType != 0 || pHeader->wColorMapLength != 0 ) @@ -236,7 +235,7 @@ static HRESULT _SetAlphaChannelToOpaque( _In_ const Image* image ) { assert( image ); - uint8_t* pPixels = reinterpret_cast<uint8_t*>( image->pixels ); + auto pPixels = reinterpret_cast<uint8_t*>( image->pixels ); if ( !pPixels ) return E_POINTER; @@ -253,7 +252,7 @@ static HRESULT _SetAlphaChannelToOpaque( _In_ const Image* image ) //------------------------------------------------------------------------------------- // Uncompress pixel data from a TGA into the target image //------------------------------------------------------------------------------------- -static HRESULT _UncompressPixels( _In_bytecount_(size) LPCVOID pSource, size_t size, _In_ const Image* image, DWORD convFlags ) +static HRESULT _UncompressPixels( _In_reads_bytes_(size) LPCVOID pSource, size_t size, _In_ const Image* image, _In_ DWORD convFlags ) { assert( pSource && size > 0 ); @@ -272,7 +271,7 @@ static HRESULT _UncompressPixels( _In_bytecount_(size) LPCVOID pSource, size_t s ComputePitch( image->format, image->width, image->height, rowPitch, slicePitch, CP_FLAGS_NONE ); } - const uint8_t* sPtr = reinterpret_cast<const uint8_t*>( pSource ); + auto sPtr = reinterpret_cast<const uint8_t*>( pSource ); const uint8_t* endPtr = sPtr + size; switch( image->format ) @@ -573,7 +572,7 @@ static HRESULT _UncompressPixels( _In_bytecount_(size) LPCVOID pSource, size_t s //------------------------------------------------------------------------------------- // Copies pixel data from a TGA into the target image //------------------------------------------------------------------------------------- -static HRESULT _CopyPixels( _In_bytecount_(size) LPCVOID pSource, size_t size, _In_ const Image* image, DWORD convFlags ) +static HRESULT _CopyPixels( _In_reads_bytes_(size) LPCVOID pSource, size_t size, _In_ const Image* image, _In_ DWORD convFlags ) { assert( pSource && size > 0 ); @@ -737,10 +736,8 @@ static HRESULT _CopyPixels( _In_bytecount_(size) LPCVOID pSource, size_t size, _ //------------------------------------------------------------------------------------- // Encodes TGA file header //------------------------------------------------------------------------------------- -static HRESULT _EncodeTGAHeader( _In_ const Image& image, _Out_ TGA_HEADER& header, DWORD& convFlags ) +static HRESULT _EncodeTGAHeader( _In_ const Image& image, _Out_ TGA_HEADER& header, _Inout_ DWORD& convFlags ) { - assert( IsValid( image.format ) && !IsVideo( image.format ) ); - memset( &header, 0, sizeof(TGA_HEADER) ); if ( (image.width > 0xFFFF) @@ -802,8 +799,8 @@ static HRESULT _EncodeTGAHeader( _In_ const Image& image, _Out_ TGA_HEADER& head // Copies BGRX data to form BGR 24bpp data //------------------------------------------------------------------------------------- #pragma warning(suppress: 6001 6101) // In the case where outSize is insufficient we do not write to pDestination -static void _Copy24bppScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ size_t outSize, - _In_bytecount_(inSize) LPCVOID pSource, _In_ size_t inSize ) +static void _Copy24bppScanline( _Out_writes_bytes_(outSize) LPVOID pDestination, _In_ size_t outSize, + _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize ) { assert( pDestination && outSize > 0 ); assert( pSource && inSize > 0 ); @@ -813,18 +810,21 @@ static void _Copy24bppScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource); uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination); - const uint8_t* endPtr = dPtr + outSize; - - for( size_t count = 0; count < inSize; count += 4 ) + if ( inSize >= 4 && outSize >= 3 ) { - uint32_t t = *(sPtr++); + const uint8_t* endPtr = dPtr + outSize; + + for( size_t count = 0; count < ( inSize - 3 ); count += 4 ) + { + uint32_t t = *(sPtr++); - if ( dPtr+2 > endPtr ) - return; + if ( dPtr+3 > endPtr ) + return; - *(dPtr++) = uint8_t(t & 0xFF); // Blue - *(dPtr++) = uint8_t((t & 0xFF00) >> 8); // Green - *(dPtr++) = uint8_t((t & 0xFF0000) >> 16); // Red + *(dPtr++) = uint8_t(t & 0xFF); // Blue + *(dPtr++) = uint8_t((t & 0xFF00) >> 8); // Green + *(dPtr++) = uint8_t((t & 0xFF0000) >> 16); // Red + } } } @@ -836,6 +836,7 @@ static void _Copy24bppScanline( _Out_bytecap_(outSize) LPVOID pDestination, _In_ //------------------------------------------------------------------------------------- // Obtain metadata from TGA file in memory/on disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GetMetadataFromTGAMemory( LPCVOID pSource, size_t size, TexMetadata& metadata ) { if ( !pSource || size == 0 ) @@ -845,12 +846,13 @@ HRESULT GetMetadataFromTGAMemory( LPCVOID pSource, size_t size, TexMetadata& met return _DecodeTGAHeader( pSource, size, metadata, offset, 0 ); } +_Use_decl_annotations_ HRESULT GetMetadataFromTGAFile( LPCWSTR szFile, TexMetadata& metadata ) { if ( !szFile ) return E_INVALIDARG; -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle( CreateFile2( szFile, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, 0 ) ) ); #else ScopedHandle hFile( safe_handle( CreateFileW( szFile, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, @@ -906,6 +908,7 @@ HRESULT GetMetadataFromTGAFile( LPCWSTR szFile, TexMetadata& metadata ) //------------------------------------------------------------------------------------- // Load a TGA file in memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromTGAMemory( LPCVOID pSource, size_t size, TexMetadata* metadata, ScratchImage& image ) { if ( !pSource || size == 0 ) @@ -923,8 +926,7 @@ HRESULT LoadFromTGAMemory( LPCVOID pSource, size_t size, TexMetadata* metadata, if ( offset > size ) return E_FAIL; - LPCVOID pPixels = reinterpret_cast<LPCVOID>( reinterpret_cast<const uint8_t*>(pSource) + offset ); - assert( pPixels ); + auto pPixels = reinterpret_cast<LPCVOID>( reinterpret_cast<const uint8_t*>(pSource) + offset ); size_t remaining = size - offset; if ( remaining == 0 ) @@ -959,6 +961,7 @@ HRESULT LoadFromTGAMemory( LPCVOID pSource, size_t size, TexMetadata* metadata, //------------------------------------------------------------------------------------- // Load a TGA file from disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromTGAFile( LPCWSTR szFile, TexMetadata* metadata, ScratchImage& image ) { if ( !szFile ) @@ -966,7 +969,7 @@ HRESULT LoadFromTGAFile( LPCWSTR szFile, TexMetadata* metadata, ScratchImage& im image.Release(); -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle( CreateFile2( szFile, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, 0 ) ) ); #else ScopedHandle hFile( safe_handle( CreateFileW( szFile, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, @@ -1105,7 +1108,6 @@ HRESULT LoadFromTGAFile( LPCWSTR szFile, TexMetadata* metadata, ScratchImage& im for( size_t h = 0; h < img->height; ++h ) { _SwizzleScanline( pPixels, rowPitch, pPixels, rowPitch, mdata.format, tflags ); - pPixels += rowPitch; } } @@ -1163,7 +1165,7 @@ HRESULT LoadFromTGAFile( LPCWSTR szFile, TexMetadata* metadata, ScratchImage& im } else // RLE || EXPAND || INVERTX || !INVERTY { - std::unique_ptr<uint8_t[]> temp( new uint8_t[ remaining ] ); + std::unique_ptr<uint8_t[]> temp( new (std::nothrow) uint8_t[ remaining ] ); if ( !temp ) { image.Release(); @@ -1208,6 +1210,7 @@ HRESULT LoadFromTGAFile( LPCWSTR szFile, TexMetadata* metadata, ScratchImage& im //------------------------------------------------------------------------------------- // Save a TGA file to memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT SaveToTGAMemory( const Image& image, Blob& blob ) { if ( !image.pixels ) @@ -1238,12 +1241,12 @@ HRESULT SaveToTGAMemory( const Image& image, Blob& blob ) return hr; // Copy header - uint8_t* dPtr = reinterpret_cast<uint8_t*>( blob.GetBufferPointer() ); + auto dPtr = reinterpret_cast<uint8_t*>( blob.GetBufferPointer() ); assert( dPtr != 0 ); memcpy_s( dPtr, blob.GetBufferSize(), &tga_header, sizeof(TGA_HEADER) ); dPtr += sizeof(TGA_HEADER); - const uint8_t* pPixels = reinterpret_cast<const uint8_t*>( image.pixels ); + auto pPixels = reinterpret_cast<const uint8_t*>( image.pixels ); assert( pPixels ); for( size_t y = 0; y < image.height; ++y ) @@ -1273,6 +1276,7 @@ HRESULT SaveToTGAMemory( const Image& image, Blob& blob ) //------------------------------------------------------------------------------------- // Save a TGA file to disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT SaveToTGAFile( const Image& image, LPCWSTR szFile ) { if ( !szFile ) @@ -1288,7 +1292,7 @@ HRESULT SaveToTGAFile( const Image& image, LPCWSTR szFile ) return hr; // Create file and write header -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) ScopedHandle hFile( safe_handle( CreateFile2( szFile, GENERIC_WRITE, 0, CREATE_ALWAYS, 0 ) ) ); #else ScopedHandle hFile( safe_handle( CreateFileW( szFile, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0 ) ) ); @@ -1336,7 +1340,7 @@ HRESULT SaveToTGAFile( const Image& image, LPCWSTR szFile ) else { // Otherwise, write the image one scanline at a time... - std::unique_ptr<uint8_t[]> temp( new uint8_t[ rowPitch ] ); + std::unique_ptr<uint8_t[]> temp( new (std::nothrow) uint8_t[ rowPitch ] ); if ( !temp ) return E_OUTOFMEMORY; @@ -1351,7 +1355,7 @@ HRESULT SaveToTGAFile( const Image& image, LPCWSTR szFile ) return E_FAIL; // Write pixels - const uint8_t* pPixels = reinterpret_cast<const uint8_t*>( image.pixels ); + auto pPixels = reinterpret_cast<const uint8_t*>( image.pixels ); for( size_t y = 0; y < image.height; ++y ) { diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexUtil.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexUtil.cpp index 9d5e0905..59837f35 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexUtil.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexUtil.cpp @@ -22,34 +22,34 @@ struct WICTranslate { GUID wic; DXGI_FORMAT format; + bool srgb; }; static WICTranslate g_WICFormats[] = { - { GUID_WICPixelFormat128bppRGBAFloat, DXGI_FORMAT_R32G32B32A32_FLOAT }, + { GUID_WICPixelFormat128bppRGBAFloat, DXGI_FORMAT_R32G32B32A32_FLOAT, false }, - { GUID_WICPixelFormat64bppRGBAHalf, DXGI_FORMAT_R16G16B16A16_FLOAT }, - { GUID_WICPixelFormat64bppRGBA, DXGI_FORMAT_R16G16B16A16_UNORM }, + { GUID_WICPixelFormat64bppRGBAHalf, DXGI_FORMAT_R16G16B16A16_FLOAT, false }, + { GUID_WICPixelFormat64bppRGBA, DXGI_FORMAT_R16G16B16A16_UNORM, true }, - { GUID_WICPixelFormat32bppRGBA, DXGI_FORMAT_R8G8B8A8_UNORM }, - { GUID_WICPixelFormat32bppBGRA, DXGI_FORMAT_B8G8R8A8_UNORM }, // DXGI 1.1 - { GUID_WICPixelFormat32bppBGR, DXGI_FORMAT_B8G8R8X8_UNORM }, // DXGI 1.1 + { GUID_WICPixelFormat32bppRGBA, DXGI_FORMAT_R8G8B8A8_UNORM, true }, + { GUID_WICPixelFormat32bppBGRA, DXGI_FORMAT_B8G8R8A8_UNORM, true }, // DXGI 1.1 + { GUID_WICPixelFormat32bppBGR, DXGI_FORMAT_B8G8R8X8_UNORM, true }, // DXGI 1.1 - { GUID_WICPixelFormat32bppRGBA1010102XR, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM }, // DXGI 1.1 - { GUID_WICPixelFormat32bppRGBA1010102, DXGI_FORMAT_R10G10B10A2_UNORM }, - { GUID_WICPixelFormat32bppRGBE, DXGI_FORMAT_R9G9B9E5_SHAREDEXP }, + { GUID_WICPixelFormat32bppRGBA1010102XR, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, true }, // DXGI 1.1 + { GUID_WICPixelFormat32bppRGBA1010102, DXGI_FORMAT_R10G10B10A2_UNORM, true }, - { GUID_WICPixelFormat16bppBGRA5551, DXGI_FORMAT_B5G5R5A1_UNORM }, - { GUID_WICPixelFormat16bppBGR565, DXGI_FORMAT_B5G6R5_UNORM }, + { GUID_WICPixelFormat16bppBGRA5551, DXGI_FORMAT_B5G5R5A1_UNORM, true }, + { GUID_WICPixelFormat16bppBGR565, DXGI_FORMAT_B5G6R5_UNORM, true }, - { GUID_WICPixelFormat32bppGrayFloat, DXGI_FORMAT_R32_FLOAT }, - { GUID_WICPixelFormat16bppGrayHalf, DXGI_FORMAT_R16_FLOAT }, - { GUID_WICPixelFormat16bppGray, DXGI_FORMAT_R16_UNORM }, - { GUID_WICPixelFormat8bppGray, DXGI_FORMAT_R8_UNORM }, + { GUID_WICPixelFormat32bppGrayFloat, DXGI_FORMAT_R32_FLOAT, false }, + { GUID_WICPixelFormat16bppGrayHalf, DXGI_FORMAT_R16_FLOAT, false }, + { GUID_WICPixelFormat16bppGray, DXGI_FORMAT_R16_UNORM, true }, + { GUID_WICPixelFormat8bppGray, DXGI_FORMAT_R8_UNORM, true }, - { GUID_WICPixelFormat8bppAlpha, DXGI_FORMAT_A8_UNORM }, + { GUID_WICPixelFormat8bppAlpha, DXGI_FORMAT_A8_UNORM, false }, - { GUID_WICPixelFormatBlackWhite, DXGI_FORMAT_R1_UNORM }, + { GUID_WICPixelFormatBlackWhite, DXGI_FORMAT_R1_UNORM, false }, }; static bool g_WIC2 = false; @@ -61,6 +61,7 @@ namespace DirectX // WIC Utilities //===================================================================================== +_Use_decl_annotations_ DXGI_FORMAT _WICToDXGI( const GUID& guid ) { for( size_t i=0; i < _countof(g_WICFormats); ++i ) @@ -69,7 +70,7 @@ DXGI_FORMAT _WICToDXGI( const GUID& guid ) return g_WICFormats[i].format; } -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) if ( g_WIC2 ) { if ( memcmp( &GUID_WICPixelFormat96bppRGBFloat, &guid, sizeof(GUID) ) == 0 ) @@ -80,12 +81,23 @@ DXGI_FORMAT _WICToDXGI( const GUID& guid ) return DXGI_FORMAT_UNKNOWN; } -bool _DXGIToWIC( DXGI_FORMAT format, GUID& guid ) +_Use_decl_annotations_ +bool _DXGIToWIC( DXGI_FORMAT format, GUID& guid, bool ignoreRGBvsBGR ) { switch( format ) { + case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - memcpy( &guid, &GUID_WICPixelFormat32bppRGBA, sizeof(GUID) ); + if ( ignoreRGBvsBGR ) + { + // If we are not doing conversion so don't really care about BGR vs RGB color-order, + // we can use the canonical WIC 32bppBGRA format which avoids an extra format conversion when using the WIC scaler + memcpy( &guid, &GUID_WICPixelFormat32bppBGRA, sizeof(GUID) ); + } + else + { + memcpy( &guid, &GUID_WICPixelFormat32bppRGBA, sizeof(GUID) ); + } return true; case DXGI_FORMAT_D32_FLOAT: @@ -104,7 +116,7 @@ bool _DXGIToWIC( DXGI_FORMAT format, GUID& guid ) memcpy( &guid, &GUID_WICPixelFormat32bppBGR, sizeof(GUID) ); return true; -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) case DXGI_FORMAT_R32G32B32_FLOAT: if ( g_WIC2 ) { @@ -130,6 +142,33 @@ bool _DXGIToWIC( DXGI_FORMAT format, GUID& guid ) return false; } +DWORD _CheckWICColorSpace( _In_ const GUID& sourceGUID, _In_ const GUID& targetGUID ) +{ + DWORD srgb = 0; + + for( size_t i=0; i < _countof(g_WICFormats); ++i ) + { + if ( memcmp( &g_WICFormats[i].wic, &sourceGUID, sizeof(GUID) ) == 0 ) + { + if ( g_WICFormats[i].srgb ) + srgb |= TEX_FILTER_SRGB_IN; + } + + if ( memcmp( &g_WICFormats[i].wic, &targetGUID, sizeof(GUID) ) == 0 ) + { + if ( g_WICFormats[i].srgb ) + srgb |= TEX_FILTER_SRGB_OUT; + } + } + + if ( (srgb & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) == (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT) ) + { + srgb &= ~(TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT); + } + + return srgb; +} + bool _IsWIC2() { return g_WIC2; @@ -142,7 +181,7 @@ IWICImagingFactory* _GetWIC() if ( s_Factory ) return s_Factory; -#if(_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if(_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) HRESULT hr = CoCreateInstance( CLSID_WICImagingFactory2, nullptr, @@ -195,7 +234,8 @@ IWICImagingFactory* _GetWIC() //------------------------------------------------------------------------------------- // Public helper function to get common WIC codec GUIDs //------------------------------------------------------------------------------------- -REFGUID GetWICCodec( _In_ WICCodecs codec ) +_Use_decl_annotations_ +REFGUID GetWICCodec( WICCodecs codec ) { switch( codec ) { @@ -233,9 +273,10 @@ REFGUID GetWICCodec( _In_ WICCodecs codec ) //------------------------------------------------------------------------------------- // Returns bits-per-pixel for a given DXGI format, or 0 on failure //------------------------------------------------------------------------------------- +_Use_decl_annotations_ size_t BitsPerPixel( DXGI_FORMAT fmt ) { - switch( fmt ) + switch( static_cast<int>(fmt) ) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: @@ -263,6 +304,9 @@ size_t BitsPerPixel( DXGI_FORMAT fmt ) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_Y416: + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: return 64; case DXGI_FORMAT_R10G10B10A2_TYPELESS: @@ -300,8 +344,20 @@ size_t BitsPerPixel( DXGI_FORMAT fmt ) case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: case DXGI_FORMAT_B8G8R8X8_TYPELESS: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_AYUV: + case DXGI_FORMAT_Y410: + case DXGI_FORMAT_YUY2: + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: return 32; + case DXGI_FORMAT_P010: + case DXGI_FORMAT_P016: + case 118 /* DXGI_FORMAT_D16_UNORM_S8_UINT */: + case 119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */: + case 120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */: + return 24; + case DXGI_FORMAT_R8G8_TYPELESS: case DXGI_FORMAT_R8G8_UNORM: case DXGI_FORMAT_R8G8_UINT: @@ -316,14 +372,24 @@ size_t BitsPerPixel( DXGI_FORMAT fmt ) case DXGI_FORMAT_R16_SINT: case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_A8P8: + case DXGI_FORMAT_B4G4R4A4_UNORM: return 16; + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_420_OPAQUE: + case DXGI_FORMAT_NV11: + return 12; + case DXGI_FORMAT_R8_TYPELESS: case DXGI_FORMAT_R8_UNORM: case DXGI_FORMAT_R8_UINT: case DXGI_FORMAT_R8_SNORM: case DXGI_FORMAT_R8_SINT: case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: return 8; case DXGI_FORMAT_R1_UNORM: @@ -354,13 +420,172 @@ size_t BitsPerPixel( DXGI_FORMAT fmt ) case DXGI_FORMAT_BC7_UNORM_SRGB: return 8; -#ifdef DXGI_1_2_FORMATS - case DXGI_FORMAT_B4G4R4A4_UNORM: + default: + return 0; + } +} + + +//------------------------------------------------------------------------------------- +// Returns bits-per-color-channel for a given DXGI format, or 0 on failure +// For mixed formats, it returns the largest color-depth in the format +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +size_t BitsPerColor( DXGI_FORMAT fmt ) +{ + switch( static_cast<int>(fmt) ) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + return 32; + + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return 24; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_Y416: + case DXGI_FORMAT_P016: + case DXGI_FORMAT_Y216: return 16; - // We don't support the video formats ( see IsVideo function ) + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + return 14; + + case DXGI_FORMAT_R11G11B10_FLOAT: + return 11; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_Y410: + case DXGI_FORMAT_P010: + case DXGI_FORMAT_Y210: + return 10; + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_AYUV: + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_420_OPAQUE: + case DXGI_FORMAT_YUY2: + case DXGI_FORMAT_NV11: + return 8; + + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return 7; + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_B5G6R5_UNORM: + return 6; + + case DXGI_FORMAT_B5G5R5A1_UNORM: + return 5; -#endif // DXGI_1_2_FORMATS + case DXGI_FORMAT_B4G4R4A4_UNORM: + return 4; + + case DXGI_FORMAT_R1_UNORM: + return 1; + + case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */: + case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */: + // These are Xbox One platform specific types + return 10; + + case 118 /* DXGI_FORMAT_D16_UNORM_S8_UINT */: + case 119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */: + case 120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */: + // These are Xbox One platform specific types + return 16; + + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: + case DXGI_FORMAT_A8P8: + // Palettized formats return 0 for this function default: return 0; @@ -372,10 +597,11 @@ size_t BitsPerPixel( DXGI_FORMAT fmt ) // Computes the image row pitch in bytes, and the slice ptich (size in bytes of the image) // based on DXGI format, width, and height //------------------------------------------------------------------------------------- +_Use_decl_annotations_ void ComputePitch( DXGI_FORMAT fmt, size_t width, size_t height, size_t& rowPitch, size_t& slicePitch, DWORD flags ) { - assert( IsValid(fmt) && !IsVideo(fmt) ); + assert( IsValid(fmt) ); if ( IsCompressed(fmt) ) { @@ -393,10 +619,28 @@ void ComputePitch( DXGI_FORMAT fmt, size_t width, size_t height, } else if ( IsPacked(fmt) ) { - rowPitch = ( ( width + 1 ) >> 1) * 4; + size_t bpe = ( fmt == DXGI_FORMAT_Y210 || fmt == DXGI_FORMAT_Y216 ) ? 8 : 4; + rowPitch = ( ( width + 1 ) >> 1 ) * bpe; slicePitch = rowPitch * height; } + else if ( fmt == DXGI_FORMAT_NV11 ) + { + rowPitch = ( ( width + 3 ) >> 2 ) * 4; + + // Direct3D makes this simplifying assumption, although it is larger than the 4:1:1 data + slicePitch = rowPitch * height * 2; + } + else if ( IsPlanar(fmt) ) + { + size_t bpe = ( fmt == DXGI_FORMAT_P010 || fmt == DXGI_FORMAT_P016 + || fmt == DXGI_FORMAT(118 /* DXGI_FORMAT_D16_UNORM_S8_UINT */) + || fmt == DXGI_FORMAT(119 /* DXGI_FORMAT_R16_UNORM_X8_TYPELESS */) + || fmt == DXGI_FORMAT(120 /* DXGI_FORMAT_X16_TYPELESS_G8_UINT */) ) ? 4 : 2; + rowPitch = ( ( width + 1 ) >> 1 ) * bpe; + + slicePitch = rowPitch * ( height + ( ( height + 1 ) >> 1 ) ); + } else { size_t bpp; @@ -410,15 +654,39 @@ void ComputePitch( DXGI_FORMAT fmt, size_t width, size_t height, else bpp = BitsPerPixel( fmt ); - if ( flags & CP_FLAGS_LEGACY_DWORD ) + if ( flags & ( CP_FLAGS_LEGACY_DWORD | CP_FLAGS_PARAGRAPH | CP_FLAGS_YMM | CP_FLAGS_ZMM | CP_FLAGS_PAGE4K ) ) { - // Special computation for some incorrectly created DDS files based on - // legacy DirectDraw assumptions about pitch alignment - rowPitch = ( ( width * bpp + 31 ) / 32 ) * sizeof(uint32_t); - slicePitch = rowPitch * height; + if ( flags & CP_FLAGS_PAGE4K ) + { + rowPitch = ( ( width * bpp + 32767 ) / 32768 ) * 4096; + slicePitch = rowPitch * height; + } + else if ( flags & CP_FLAGS_ZMM ) + { + rowPitch = ( ( width * bpp + 511 ) / 512 ) * 64; + slicePitch = rowPitch * height; + } + else if ( flags & CP_FLAGS_YMM ) + { + rowPitch = ( ( width * bpp + 255 ) / 256) * 32; + slicePitch = rowPitch * height; + } + else if ( flags & CP_FLAGS_PARAGRAPH ) + { + rowPitch = ( ( width * bpp + 127 ) / 128 ) * 16; + slicePitch = rowPitch * height; + } + else // DWORD alignment + { + // Special computation for some incorrectly created DDS files based on + // legacy DirectDraw assumptions about pitch alignment + rowPitch = ( ( width * bpp + 31 ) / 32 ) * sizeof(uint32_t); + slicePitch = rowPitch * height; + } } else { + // Default byte alignment rowPitch = ( width * bpp + 7 ) / 8; slicePitch = rowPitch * height; } @@ -429,7 +697,8 @@ void ComputePitch( DXGI_FORMAT fmt, size_t width, size_t height, //------------------------------------------------------------------------------------- // Converts to an SRGB equivalent type if available //------------------------------------------------------------------------------------- -DXGI_FORMAT MakeSRGB( _In_ DXGI_FORMAT fmt ) +_Use_decl_annotations_ +DXGI_FORMAT MakeSRGB( DXGI_FORMAT fmt ) { switch( fmt ) { @@ -463,7 +732,8 @@ DXGI_FORMAT MakeSRGB( _In_ DXGI_FORMAT fmt ) //------------------------------------------------------------------------------------- // Converts to a format to an equivalent TYPELESS format if available //------------------------------------------------------------------------------------- -DXGI_FORMAT MakeTypeless( _In_ DXGI_FORMAT fmt ) +_Use_decl_annotations_ +DXGI_FORMAT MakeTypeless( DXGI_FORMAT fmt ) { switch( fmt ) { @@ -531,7 +801,6 @@ DXGI_FORMAT MakeTypeless( _In_ DXGI_FORMAT fmt ) case DXGI_FORMAT_R8_UINT: case DXGI_FORMAT_R8_SNORM: case DXGI_FORMAT_R8_SINT: - case DXGI_FORMAT_A8_UNORM: return DXGI_FORMAT_R8_TYPELESS; case DXGI_FORMAT_BC1_UNORM: @@ -579,7 +848,8 @@ DXGI_FORMAT MakeTypeless( _In_ DXGI_FORMAT fmt ) //------------------------------------------------------------------------------------- // Converts to a TYPELESS format to an equivalent UNORM format if available //------------------------------------------------------------------------------------- -DXGI_FORMAT MakeTypelessUNORM( _In_ DXGI_FORMAT fmt ) +_Use_decl_annotations_ +DXGI_FORMAT MakeTypelessUNORM( DXGI_FORMAT fmt ) { switch( fmt ) { @@ -637,7 +907,8 @@ DXGI_FORMAT MakeTypelessUNORM( _In_ DXGI_FORMAT fmt ) //------------------------------------------------------------------------------------- // Converts to a TYPELESS format to an equivalent FLOAT format if available //------------------------------------------------------------------------------------- -DXGI_FORMAT MakeTypelessFLOAT( _In_ DXGI_FORMAT fmt ) +_Use_decl_annotations_ +DXGI_FORMAT MakeTypelessFLOAT( DXGI_FORMAT fmt ) { switch( fmt ) { @@ -672,7 +943,8 @@ DXGI_FORMAT MakeTypelessFLOAT( _In_ DXGI_FORMAT fmt ) // TexMetadata //===================================================================================== -size_t TexMetadata::ComputeIndex( _In_ size_t mip, _In_ size_t item, _In_ size_t slice ) const +_Use_decl_annotations_ +size_t TexMetadata::ComputeIndex( size_t mip, size_t item, size_t slice ) const { if ( mip >= mipLevels ) return size_t(-1); @@ -726,6 +998,21 @@ size_t TexMetadata::ComputeIndex( _In_ size_t mip, _In_ size_t item, _In_ size_t // Blob - Bitmap image container //===================================================================================== +Blob& Blob::operator= (Blob&& moveFrom) +{ + if ( this != &moveFrom ) + { + Release(); + + _buffer = moveFrom._buffer; + _size = moveFrom._size; + + moveFrom._buffer = nullptr; + moveFrom._size = 0; + } + return *this; +} + void Blob::Release() { if ( _buffer ) @@ -737,6 +1024,7 @@ void Blob::Release() _size = 0; } +_Use_decl_annotations_ HRESULT Blob::Initialize( size_t size ) { if ( !size ) diff --git a/thirdparty/directxtex/DirectXTex/DirectXTexWIC.cpp b/thirdparty/directxtex/DirectXTex/DirectXTexWIC.cpp index 05cb76a3..0bebfafb 100644 --- a/thirdparty/directxtex/DirectXTex/DirectXTexWIC.cpp +++ b/thirdparty/directxtex/DirectXTex/DirectXTexWIC.cpp @@ -15,6 +15,58 @@ #include "DirectXTexP.h" +using Microsoft::WRL::ComPtr; + +//------------------------------------------------------------------------------------- +// IStream support for WIC Memory routines +//------------------------------------------------------------------------------------- + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) && (WINAPI_FAMILY != WINAPI_FAMILY_PHONE_APP) + + #include <shcore.h> + #pragma comment(lib,"shcore.lib") + +#ifdef __cplusplus_winrt + + static inline HRESULT CreateMemoryStream( _Outptr_ IStream** stream ) + { + auto randomAccessStream = ref new ::Windows::Storage::Streams::InMemoryRandomAccessStream(); + return CreateStreamOverRandomAccessStream( randomAccessStream, IID_PPV_ARGS( stream ) ); + } + +#else + + #include <wrl/client.h> + #include <wrl/wrappers/corewrappers.h> + #include <windows.storage.streams.h> + + static inline HRESULT CreateMemoryStream( _Outptr_ IStream** stream ) + { + Microsoft::WRL::ComPtr<ABI::Windows::Storage::Streams::IRandomAccessStream> abiStream; + HRESULT hr = Windows::Foundation::ActivateInstance( + Microsoft::WRL::Wrappers::HStringReference( RuntimeClass_Windows_Storage_Streams_InMemoryRandomAccessStream ).Get(), + abiStream.GetAddressOf() ); + + if (SUCCEEDED(hr)) + { + hr = CreateStreamOverRandomAccessStream( abiStream.Get(), IID_PPV_ARGS( stream ) ); + } + return hr; + } + +#endif // __cplusplus_winrt + +#else + + #pragma prefast(suppress:28196, "a simple wrapper around an existing annotated function" ); + static inline HRESULT CreateMemoryStream( _Outptr_ IStream** stream ) + { + return CreateStreamOnHGlobal( 0, TRUE, stream ); + } + +#endif + + //------------------------------------------------------------------------------------- // WIC Pixel Format nearest conversion table //------------------------------------------------------------------------------------- @@ -67,13 +119,14 @@ static WICConvert g_WICConvert[] = { GUID_WICPixelFormat128bppRGBFloat, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat128bppRGBAFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat128bppRGBFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat32bppRGBE, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat32bppCMYK, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM { GUID_WICPixelFormat64bppCMYK, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM { GUID_WICPixelFormat40bppCMYKAlpha, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM { GUID_WICPixelFormat80bppCMYKAlpha, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) { GUID_WICPixelFormat32bppRGB, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM { GUID_WICPixelFormat64bppRGB, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM { GUID_WICPixelFormat64bppPRGBAHalf, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT @@ -100,7 +153,7 @@ static DXGI_FORMAT _DetermineFormat( _In_ const WICPixelFormatGUID& pixelFormat, { if ( memcmp( &GUID_WICPixelFormat96bppRGBFixedPoint, &pixelFormat, sizeof(WICPixelFormatGUID) ) == 0 ) { -#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) || defined(_WIN7_PLATFORM_UPDATE) +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) if ( _IsWIC2() ) { if ( pConvert ) @@ -222,7 +275,66 @@ static HRESULT _DecodeMetadata( _In_ DWORD flags, if ( metadata.format == DXGI_FORMAT_UNKNOWN ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); - return S_OK; + if ( !( flags & WIC_FLAGS_IGNORE_SRGB ) ) + { + GUID containerFormat; + hr = decoder->GetContainerFormat( &containerFormat ); + if ( FAILED(hr) ) + return hr; + + ComPtr<IWICMetadataQueryReader> metareader; + hr = frame->GetMetadataQueryReader( metareader.GetAddressOf() ); + if ( SUCCEEDED(hr) ) + { + // Check for sRGB colorspace metadata + bool sRGB = false; + + PROPVARIANT value; + PropVariantInit( &value ); + + if ( memcmp( &containerFormat, &GUID_ContainerFormatPng, sizeof(GUID) ) == 0 ) + { + // Check for sRGB chunk + if ( SUCCEEDED( metareader->GetMetadataByName( L"/sRGB/RenderingIntent", &value ) ) && value.vt == VT_UI1 ) + { + sRGB = true; + } + } +#if defined(_XBOX_ONE) && defined(_TITLE) + else if ( memcmp( &containerFormat, &GUID_ContainerFormatJpeg, sizeof(GUID) ) == 0 ) + { + if ( SUCCEEDED( metareader->GetMetadataByName( L"/app1/ifd/exif/{ushort=40961}", &value ) ) && value.vt == VT_UI2 && value.uiVal == 1 ) + { + sRGB = true; + } + } + else if ( memcmp( &containerFormat, &GUID_ContainerFormatTiff, sizeof(GUID) ) == 0 ) + { + if ( SUCCEEDED( metareader->GetMetadataByName( L"/ifd/exif/{ushort=40961}", &value ) ) && value.vt == VT_UI2 && value.uiVal == 1 ) + { + sRGB = true; + } + } +#else + else if ( SUCCEEDED( metareader->GetMetadataByName( L"System.Image.ColorSpace", &value ) ) && value.vt == VT_UI2 && value.uiVal == 1 ) + { + sRGB = true; + } +#endif + + PropVariantClear( &value ); + + if ( sRGB ) + metadata.format = MakeSRGB( metadata.format ); + } + else if ( hr == WINCODEC_ERR_UNSUPPORTEDOPERATION ) + { + // Some formats just don't support metadata (BMP, ICO, etc.), so ignore this failure + hr = S_OK; + } + } + + return hr; } @@ -255,11 +367,23 @@ static HRESULT _DecodeSingleFrame( _In_ DWORD flags, _In_ const TexMetadata& met } else { - ScopedObject<IWICFormatConverter> FC; - hr = pWIC->CreateFormatConverter( &FC ); + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; + WICPixelFormatGUID pixelFormat; + hr = frame->GetPixelFormat( &pixelFormat ); + if ( FAILED(hr) ) + return hr; + + BOOL canConvert = FALSE; + hr = FC->CanConvert( pixelFormat, convertGUID, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + hr = FC->Initialize( frame, convertGUID, _GetWICDither( flags ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; @@ -300,8 +424,8 @@ static HRESULT _DecodeMultiframe( _In_ DWORD flags, _In_ const TexMetadata& meta if ( !img ) return E_POINTER; - ScopedObject<IWICBitmapFrameDecode> frame; - hr = decoder->GetFrame( static_cast<UINT>( index ), &frame ); + ComPtr<IWICBitmapFrameDecode> frame; + hr = decoder->GetFrame( static_cast<UINT>( index ), frame.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -315,64 +439,82 @@ static HRESULT _DecodeMultiframe( _In_ DWORD flags, _In_ const TexMetadata& meta if ( FAILED(hr) ) return hr; - if ( memcmp( &pfGuid, &sourceGUID, sizeof(WICPixelFormatGUID) ) == 0 ) + if ( w == metadata.width && h == metadata.height ) { - if ( w == metadata.width && h == metadata.height ) + // This frame does not need resized + if ( memcmp( &pfGuid, &sourceGUID, sizeof(WICPixelFormatGUID) ) == 0 ) { - // This frame does not need resized or format converted, just copy... hr = frame->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); if ( FAILED(hr) ) return hr; } else { - // This frame needs resizing, but not format converted - ScopedObject<IWICBitmapScaler> scaler; - hr = pWIC->CreateBitmapScaler( &scaler ); + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; - hr = scaler->Initialize( frame.Get(), static_cast<UINT>( metadata.width ), static_cast<UINT>( metadata.height ), _GetWICInterp( flags ) ); + BOOL canConvert = FALSE; + hr = FC->CanConvert( pfGuid, sourceGUID, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + + hr = FC->Initialize( frame.Get(), sourceGUID, _GetWICDither( flags ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; - - hr = scaler->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); + + hr = FC->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); if ( FAILED(hr) ) return hr; } } else { - // This frame required format conversion - ScopedObject<IWICFormatConverter> FC; - hr = pWIC->CreateFormatConverter( &FC ); + // This frame needs resizing + ComPtr<IWICBitmapScaler> scaler; + hr = pWIC->CreateBitmapScaler( scaler.GetAddressOf() ); if ( FAILED(hr) ) return hr; - hr = FC->Initialize( frame.Get(), pfGuid, _GetWICDither( flags ), 0, 0, WICBitmapPaletteTypeCustom ); + hr = scaler->Initialize( frame.Get(), static_cast<UINT>( metadata.width ), static_cast<UINT>( metadata.height ), _GetWICInterp( flags ) ); if ( FAILED(hr) ) return hr; - - if ( w == metadata.width && h == metadata.height ) + + WICPixelFormatGUID pfScaler; + hr = scaler->GetPixelFormat( &pfScaler ); + if ( FAILED(hr) ) + return hr; + + if ( memcmp( &pfScaler, &sourceGUID, sizeof(WICPixelFormatGUID) ) == 0 ) { - // This frame is the same size, no need to scale - hr = FC->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); + hr = scaler->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); if ( FAILED(hr) ) return hr; } else { - // This frame needs resizing and format converted - ScopedObject<IWICBitmapScaler> scaler; - hr = pWIC->CreateBitmapScaler( &scaler ); + // The WIC bitmap scaler is free to return a different pixel format than the source image, so here we + // convert it to our desired format + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; - hr = scaler->Initialize( FC.Get(), static_cast<UINT>( metadata.width ), static_cast<UINT>( metadata.height ), _GetWICInterp( flags ) ); + BOOL canConvert = FALSE; + hr = FC->CanConvert( pfScaler, sourceGUID, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + + hr = FC->Initialize( scaler.Get(), sourceGUID, _GetWICDither( flags ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; - hr = scaler->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); + hr = FC->CopyPixels( 0, static_cast<UINT>( img->rowPitch ), static_cast<UINT>( img->slicePitch ), img->pixels ); if ( FAILED(hr) ) return hr; } @@ -384,9 +526,96 @@ static HRESULT _DecodeMultiframe( _In_ DWORD flags, _In_ const TexMetadata& meta //------------------------------------------------------------------------------------- +// Encodes image metadata +//------------------------------------------------------------------------------------- +static HRESULT _EncodeMetadata( _In_ IWICBitmapFrameEncode* frame, _In_ const GUID& containerFormat, _In_ DXGI_FORMAT format ) +{ + if ( !frame ) + return E_POINTER; + + ComPtr<IWICMetadataQueryWriter> metawriter; + HRESULT hr = frame->GetMetadataQueryWriter( metawriter.GetAddressOf() ); + if ( SUCCEEDED( hr ) ) + { + PROPVARIANT value; + PropVariantInit( &value ); + + bool sRGB = IsSRGB( format ); + + value.vt = VT_LPSTR; + value.pszVal = "DirectXTex"; + + if ( memcmp( &containerFormat, &GUID_ContainerFormatPng, sizeof(GUID) ) == 0 ) + { + // Set Software name + (void)metawriter->SetMetadataByName( L"/tEXt/{str=Software}", &value ); + + // Set sRGB chunk + if ( sRGB ) + { + value.vt = VT_UI1; + value.bVal = 0; + (void)metawriter->SetMetadataByName( L"/sRGB/RenderingIntent", &value ); + } + } +#if defined(_XBOX_ONE) && defined(_TITLE) + else if ( memcmp( &containerFormat, &GUID_ContainerFormatJpeg, sizeof(GUID) ) == 0 ) + { + // Set Software name + (void)metawriter->SetMetadataByName( L"/app1/ifd/{ushort=305}", &value ); + + if ( sRGB ) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName( L"/app1/ifd/exif/{ushort=40961}", &value ); + } + } + else if ( memcmp( &containerFormat, &GUID_ContainerFormatTiff, sizeof(GUID) ) == 0 ) + { + // Set Software name + (void)metawriter->SetMetadataByName( L"/ifd/{ushort=305}", &value ); + + if ( sRGB ) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName( L"/ifd/exif/{ushort=40961}", &value ); + } + } +#else + else + { + // Set Software name + (void)metawriter->SetMetadataByName( L"System.ApplicationName", &value ); + + if ( sRGB ) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName( L"System.Image.ColorSpace", &value ); + } + } +#endif + } + else if ( hr == WINCODEC_ERR_UNSUPPORTEDOPERATION ) + { + // Some formats just don't support metadata (BMP, ICO, etc.), so ignore this failure + hr = S_OK; + } + + return hr; +} + + +//------------------------------------------------------------------------------------- // Encodes a single frame //------------------------------------------------------------------------------------- -static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ IWICBitmapFrameEncode* frame, _In_opt_ IPropertyBag2* props, _In_opt_ const GUID* targetFormat ) +static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ REFGUID containerFormat, + _In_ IWICBitmapFrameEncode* frame, _In_opt_ IPropertyBag2* props, _In_opt_ const GUID* targetFormat ) { if ( !frame ) return E_INVALIDARG; @@ -402,7 +631,7 @@ static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ IWI if ( FAILED(hr) ) return hr; -#ifdef _AMD64_ +#ifdef _M_X64 if ( (image.width > 0xFFFFFFFF) || (image.height > 0xFFFFFFFF) ) return E_INVALIDARG; #endif @@ -420,6 +649,16 @@ static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ IWI if ( FAILED(hr) ) return hr; + if ( targetFormat && memcmp( targetFormat, &targetGuid, sizeof(WICPixelFormatGUID) ) != 0 ) + { + // Requested output pixel format is not supported by the WIC codec + return E_FAIL; + } + + hr = _EncodeMetadata( frame, containerFormat, image.format ); + if ( FAILED(hr) ) + return hr; + if ( memcmp( &targetGuid, &pfGuid, sizeof(WICPixelFormatGUID) ) != 0 ) { // Conversion required to write @@ -427,23 +666,30 @@ static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ IWI if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICBitmap> source; + ComPtr<IWICBitmap> source; hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( image.width ), static_cast<UINT>( image.height ), pfGuid, static_cast<UINT>( image.rowPitch ), static_cast<UINT>( image.slicePitch ), - image.pixels, &source ); + image.pixels, source.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICFormatConverter> FC; - hr = pWIC->CreateFormatConverter( &FC ); + ComPtr<IWICFormatConverter> FC; + hr = pWIC->CreateFormatConverter( FC.GetAddressOf() ); if ( FAILED(hr) ) return hr; + BOOL canConvert = FALSE; + hr = FC->CanConvert( pfGuid, targetGuid, &canConvert ); + if ( FAILED(hr) || !canConvert ) + { + return E_UNEXPECTED; + } + hr = FC->Initialize( source.Get(), targetGuid, _GetWICDither( flags ), 0, 0, WICBitmapPaletteTypeCustom ); if ( FAILED(hr) ) return hr; - WICRect rect = { 0, 0, static_cast<UINT>( image.width ), static_cast<UINT>( image.height ) }; + WICRect rect = { 0, 0, static_cast<INT>( image.width ), static_cast<INT>( image.height ) }; hr = frame->WriteSource( FC.Get(), &rect ); if ( FAILED(hr) ) return hr; @@ -465,7 +711,8 @@ static HRESULT _EncodeImage( _In_ const Image& image, _In_ DWORD flags, _In_ IWI } static HRESULT _EncodeSingleFrame( _In_ const Image& image, _In_ DWORD flags, - _In_ REFGUID guidContainerFormat, _Inout_ IStream* stream, _In_opt_ const GUID* targetFormat ) + _In_ REFGUID containerFormat, _Inout_ IStream* stream, + _In_opt_ const GUID* targetFormat, _In_opt_ std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !stream ) return E_INVALIDARG; @@ -475,8 +722,8 @@ static HRESULT _EncodeSingleFrame( _In_ const Image& image, _In_ DWORD flags, if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICBitmapEncoder> encoder; - HRESULT hr = pWIC->CreateEncoder( guidContainerFormat, 0, &encoder ); + ComPtr<IWICBitmapEncoder> encoder; + HRESULT hr = pWIC->CreateEncoder( containerFormat, 0, encoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -484,30 +731,30 @@ static HRESULT _EncodeSingleFrame( _In_ const Image& image, _In_ DWORD flags, if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFrameEncode> frame; - ScopedObject<IPropertyBag2> props; - hr = encoder->CreateNewFrame( &frame, &props ); + ComPtr<IWICBitmapFrameEncode> frame; + ComPtr<IPropertyBag2> props; + hr = encoder->CreateNewFrame( frame.GetAddressOf(), props.GetAddressOf() ); if ( FAILED(hr) ) return hr; - if ( memcmp( &guidContainerFormat, &GUID_ContainerFormatBmp, sizeof(WICPixelFormatGUID) ) == 0 ) + if ( memcmp( &containerFormat, &GUID_ContainerFormatBmp, sizeof(WICPixelFormatGUID) ) == 0 && _IsWIC2() ) { - // Opt-in to the Windows 8 support for writing 32-bit Windows BMP files with an alpha channel if supported + // Opt-in to the WIC2 support for writing 32-bit Windows BMP files with an alpha channel PROPBAG2 option = { 0 }; option.pstrName = L"EnableV5Header32bppBGRA"; VARIANT varValue; varValue.vt = VT_BOOL; varValue.boolVal = VARIANT_TRUE; - hr = props->Write( 1, &option, &varValue ); - if ( FAILED(hr) ) - { - // Fails on older versions of WIC, so we default to the null property bag - props.Reset(); - } + (void)props->Write( 1, &option, &varValue ); + } + + if ( setCustomProps ) + { + setCustomProps( props.Get() ); } - hr = _EncodeImage( image, flags, frame.Get(), props.Get(), targetFormat ); + hr = _EncodeImage( image, flags, containerFormat, frame.Get(), props.Get(), targetFormat ); if ( FAILED(hr) ) return hr; @@ -522,8 +769,9 @@ static HRESULT _EncodeSingleFrame( _In_ const Image& image, _In_ DWORD flags, //------------------------------------------------------------------------------------- // Encodes an image array //------------------------------------------------------------------------------------- -static HRESULT _EncodeMultiframe( _In_count_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, - _In_ REFGUID guidContainerFormat, _Inout_ IStream* stream, _In_opt_ const GUID* targetFormat ) +static HRESULT _EncodeMultiframe( _In_reads_(nimages) const Image* images, _In_ size_t nimages, _In_ DWORD flags, + _In_ REFGUID containerFormat, _Inout_ IStream* stream, + _In_opt_ const GUID* targetFormat, _In_opt_ std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !stream || nimages < 2 ) return E_INVALIDARG; @@ -536,13 +784,13 @@ static HRESULT _EncodeMultiframe( _In_count_(nimages) const Image* images, _In_ if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICBitmapEncoder> encoder; - HRESULT hr = pWIC->CreateEncoder( guidContainerFormat, 0, &encoder ); + ComPtr<IWICBitmapEncoder> encoder; + HRESULT hr = pWIC->CreateEncoder( containerFormat, 0, encoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapEncoderInfo> einfo; - hr = encoder->GetEncoderInfo( &einfo ); + ComPtr<IWICBitmapEncoderInfo> einfo; + hr = encoder->GetEncoderInfo( einfo.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -560,12 +808,18 @@ static HRESULT _EncodeMultiframe( _In_count_(nimages) const Image* images, _In_ for( size_t index=0; index < nimages; ++index ) { - ScopedObject<IWICBitmapFrameEncode> frame; - hr = encoder->CreateNewFrame( &frame, nullptr ); + ComPtr<IWICBitmapFrameEncode> frame; + ComPtr<IPropertyBag2> props; + hr = encoder->CreateNewFrame( frame.GetAddressOf(), props.GetAddressOf() ); if ( FAILED(hr) ) return hr; - hr = _EncodeImage( images[index], flags, frame.Get(), nullptr, targetFormat ); + if ( setCustomProps ) + { + setCustomProps( props.Get() ); + } + + hr = _EncodeImage( images[index], flags, containerFormat, frame.Get(), props.Get(), targetFormat ); if ( FAILED(hr) ) return hr; } @@ -585,12 +839,13 @@ static HRESULT _EncodeMultiframe( _In_count_(nimages) const Image* images, _In_ //------------------------------------------------------------------------------------- // Obtain metadata from WIC-supported file in memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GetMetadataFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadata& metadata ) { if ( !pSource || size == 0 ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( size > 0xFFFFFFFF ) return HRESULT_FROM_WIN32( ERROR_FILE_TOO_LARGE ); #endif @@ -600,8 +855,8 @@ HRESULT GetMetadataFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, Tex return E_NOINTERFACE; // Create input stream for memory - ScopedObject<IWICStream> stream; - HRESULT hr = pWIC->CreateStream( &stream ); + ComPtr<IWICStream> stream; + HRESULT hr = pWIC->CreateStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -611,13 +866,13 @@ HRESULT GetMetadataFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, Tex return hr; // Initialize WIC - ScopedObject<IWICBitmapDecoder> decoder; - hr = pWIC->CreateDecoderFromStream( stream.Get(), 0, WICDecodeMetadataCacheOnDemand, &decoder ); + ComPtr<IWICBitmapDecoder> decoder; + hr = pWIC->CreateDecoderFromStream( stream.Get(), 0, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFrameDecode> frame; - hr = decoder->GetFrame( 0, &frame ); + ComPtr<IWICBitmapFrameDecode> frame; + hr = decoder->GetFrame( 0, frame.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -633,6 +888,7 @@ HRESULT GetMetadataFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, Tex //------------------------------------------------------------------------------------- // Obtain metadata from WIC-supported file on disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT GetMetadataFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata& metadata ) { if ( !szFile ) @@ -643,13 +899,13 @@ HRESULT GetMetadataFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata& metada return E_NOINTERFACE; // Initialize WIC - ScopedObject<IWICBitmapDecoder> decoder; - HRESULT hr = pWIC->CreateDecoderFromFilename( szFile, 0, GENERIC_READ, WICDecodeMetadataCacheOnDemand, &decoder ); + ComPtr<IWICBitmapDecoder> decoder; + HRESULT hr = pWIC->CreateDecoderFromFilename( szFile, 0, GENERIC_READ, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFrameDecode> frame; - hr = decoder->GetFrame( 0, &frame ); + ComPtr<IWICBitmapFrameDecode> frame; + hr = decoder->GetFrame( 0, frame.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -665,12 +921,13 @@ HRESULT GetMetadataFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata& metada //------------------------------------------------------------------------------------- // Load a WIC-supported file in memory //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadata* metadata, ScratchImage& image ) { if ( !pSource || size == 0 ) return E_INVALIDARG; -#ifdef _AMD64_ +#ifdef _M_X64 if ( size > 0xFFFFFFFF ) return HRESULT_FROM_WIN32( ERROR_FILE_TOO_LARGE ); #endif @@ -682,8 +939,8 @@ HRESULT LoadFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat image.Release(); // Create input stream for memory - ScopedObject<IWICStream> stream; - HRESULT hr = pWIC->CreateStream( &stream ); + ComPtr<IWICStream> stream; + HRESULT hr = pWIC->CreateStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -692,13 +949,13 @@ HRESULT LoadFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat return hr; // Initialize WIC - ScopedObject<IWICBitmapDecoder> decoder; - hr = pWIC->CreateDecoderFromStream( stream.Get(), 0, WICDecodeMetadataCacheOnDemand, &decoder ); + ComPtr<IWICBitmapDecoder> decoder; + hr = pWIC->CreateDecoderFromStream( stream.Get(), 0, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFrameDecode> frame; - hr = decoder->GetFrame( 0, &frame ); + ComPtr<IWICBitmapFrameDecode> frame; + hr = decoder->GetFrame( 0, frame.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -734,6 +991,7 @@ HRESULT LoadFromWICMemory( LPCVOID pSource, size_t size, DWORD flags, TexMetadat //------------------------------------------------------------------------------------- // Load a WIC-supported file from disk //------------------------------------------------------------------------------------- +_Use_decl_annotations_ HRESULT LoadFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, ScratchImage& image ) { if ( !szFile ) @@ -746,13 +1004,13 @@ HRESULT LoadFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr image.Release(); // Initialize WIC - ScopedObject<IWICBitmapDecoder> decoder; - HRESULT hr = pWIC->CreateDecoderFromFilename( szFile, 0, GENERIC_READ, WICDecodeMetadataCacheOnDemand, &decoder ); + ComPtr<IWICBitmapDecoder> decoder; + HRESULT hr = pWIC->CreateDecoderFromFilename( szFile, 0, GENERIC_READ, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf() ); if ( FAILED(hr) ) return hr; - ScopedObject<IWICBitmapFrameDecode> frame; - hr = decoder->GetFrame( 0, &frame ); + ComPtr<IWICBitmapFrameDecode> frame; + hr = decoder->GetFrame( 0, frame.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -788,19 +1046,21 @@ HRESULT LoadFromWICFile( LPCWSTR szFile, DWORD flags, TexMetadata* metadata, Scr //------------------------------------------------------------------------------------- // Save a WIC-supported file to memory //------------------------------------------------------------------------------------- -HRESULT SaveToWICMemory( const Image& image, DWORD flags, REFGUID guidContainerFormat, Blob& blob, const GUID* targetFormat ) +_Use_decl_annotations_ +HRESULT SaveToWICMemory( const Image& image, DWORD flags, REFGUID containerFormat, Blob& blob, + const GUID* targetFormat, std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !image.pixels ) return E_POINTER; blob.Release(); - ScopedObject<IStream> stream; - HRESULT hr = CreateStreamOnHGlobal( 0, TRUE, &stream ); + ComPtr<IStream> stream; + HRESULT hr = CreateMemoryStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; - hr = _EncodeSingleFrame( image, flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeSingleFrame( image, flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); if ( FAILED(hr) ) return hr; @@ -833,22 +1093,24 @@ HRESULT SaveToWICMemory( const Image& image, DWORD flags, REFGUID guidContainerF return S_OK; } -HRESULT SaveToWICMemory( const Image* images, size_t nimages, DWORD flags, REFGUID guidContainerFormat, Blob& blob, const GUID* targetFormat ) +_Use_decl_annotations_ +HRESULT SaveToWICMemory( const Image* images, size_t nimages, DWORD flags, REFGUID containerFormat, Blob& blob, + const GUID* targetFormat, std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !images || nimages == 0 ) return E_INVALIDARG; blob.Release(); - ScopedObject<IStream> stream; - HRESULT hr = CreateStreamOnHGlobal( 0, TRUE, &stream ); + ComPtr<IStream> stream; + HRESULT hr = CreateMemoryStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; if ( nimages > 1 ) - hr = _EncodeMultiframe( images, nimages, flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeMultiframe( images, nimages, flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); else - hr = _EncodeSingleFrame( images[0], flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeSingleFrame( images[0], flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); if ( FAILED(hr) ) return hr; @@ -886,7 +1148,9 @@ HRESULT SaveToWICMemory( const Image* images, size_t nimages, DWORD flags, REFGU //------------------------------------------------------------------------------------- // Save a WIC-supported file to disk //------------------------------------------------------------------------------------- -HRESULT SaveToWICFile( const Image& image, DWORD flags, REFGUID guidContainerFormat, LPCWSTR szFile, const GUID* targetFormat ) +_Use_decl_annotations_ +HRESULT SaveToWICFile( const Image& image, DWORD flags, REFGUID containerFormat, LPCWSTR szFile, + const GUID* targetFormat, std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !szFile ) return E_INVALIDARG; @@ -898,8 +1162,8 @@ HRESULT SaveToWICFile( const Image& image, DWORD flags, REFGUID guidContainerFor if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICStream> stream; - HRESULT hr = pWIC->CreateStream( &stream ); + ComPtr<IWICStream> stream; + HRESULT hr = pWIC->CreateStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -907,14 +1171,16 @@ HRESULT SaveToWICFile( const Image& image, DWORD flags, REFGUID guidContainerFor if ( FAILED(hr) ) return hr; - hr = _EncodeSingleFrame( image, flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeSingleFrame( image, flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); if ( FAILED(hr) ) return hr; return S_OK; } -HRESULT SaveToWICFile( const Image* images, size_t nimages, DWORD flags, REFGUID guidContainerFormat, LPCWSTR szFile, const GUID* targetFormat ) +_Use_decl_annotations_ +HRESULT SaveToWICFile( const Image* images, size_t nimages, DWORD flags, REFGUID containerFormat, LPCWSTR szFile, const GUID* targetFormat, + std::function<void(IPropertyBag2*)> setCustomProps ) { if ( !szFile || !images || nimages == 0 ) return E_INVALIDARG; @@ -923,8 +1189,8 @@ HRESULT SaveToWICFile( const Image* images, size_t nimages, DWORD flags, REFGUID if ( !pWIC ) return E_NOINTERFACE; - ScopedObject<IWICStream> stream; - HRESULT hr = pWIC->CreateStream( &stream ); + ComPtr<IWICStream> stream; + HRESULT hr = pWIC->CreateStream( stream.GetAddressOf() ); if ( FAILED(hr) ) return hr; @@ -933,9 +1199,9 @@ HRESULT SaveToWICFile( const Image* images, size_t nimages, DWORD flags, REFGUID return hr; if ( nimages > 1 ) - hr = _EncodeMultiframe( images, nimages, flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeMultiframe( images, nimages, flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); else - hr = _EncodeSingleFrame( images[0], flags, guidContainerFormat, stream.Get(), targetFormat ); + hr = _EncodeSingleFrame( images[0], flags, containerFormat, stream.Get(), targetFormat, setCustomProps ); if ( FAILED(hr) ) return hr; diff --git a/thirdparty/directxtex/DirectXTex/filters.h b/thirdparty/directxtex/DirectXTex/filters.h new file mode 100644 index 00000000..9579fa4e --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/filters.h @@ -0,0 +1,426 @@ +//------------------------------------------------------------------------------------- +// filters.h +// +// Utility header with helpers for implementing image filters +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- + +#pragma once + +#ifdef USE_XNAMATH +#include <xnamath.h> +#else +#include <directxmath.h> +#include <directxpackedvector.h> +#endif + +#include <memory> + +#include "scoped.h" + +namespace DirectX +{ + +//------------------------------------------------------------------------------------- +// Box filtering helpers +//------------------------------------------------------------------------------------- + +XMGLOBALCONST XMVECTORF32 g_boxScale = { 0.25f, 0.25f, 0.25f, 0.25f }; +XMGLOBALCONST XMVECTORF32 g_boxScale3D = { 0.125f, 0.125f, 0.125f, 0.125f }; + +#define AVERAGE4( res, p0, p1, p2, p3 ) \ +{ \ + XMVECTOR v = XMVectorAdd( (p0), (p1) ); \ + v = XMVectorAdd( v, (p2) ); \ + v = XMVectorAdd( v, (p3) ); \ + res = XMVectorMultiply( v, g_boxScale ); \ +} + +#define AVERAGE8( res, p0, p1, p2, p3, p4, p5, p6, p7) \ +{ \ + XMVECTOR v = XMVectorAdd( (p0), (p1) ); \ + v = XMVectorAdd( v, (p2) ); \ + v = XMVectorAdd( v, (p3) ); \ + v = XMVectorAdd( v, (p4) ); \ + v = XMVectorAdd( v, (p5) ); \ + v = XMVectorAdd( v, (p6) ); \ + v = XMVectorAdd( v, (p7) ); \ + res = XMVectorMultiply( v, g_boxScale3D ); \ +} + + +//------------------------------------------------------------------------------------- +// Linear filtering helpers +//------------------------------------------------------------------------------------- + +struct LinearFilter +{ + size_t u0; + float weight0; + size_t u1; + float weight1; +}; + +inline void _CreateLinearFilter( _In_ size_t source, _In_ size_t dest, _In_ bool wrap, _Out_writes_(dest) LinearFilter* lf ) +{ + assert( source > 0 ); + assert( dest > 0 ); + assert( lf != 0 ); + + float scale = float(source) / float(dest); + + // Mirror is the same case as clamp for linear + + for( size_t u = 0; u < dest; ++u ) + { + float srcB = ( float(u) + 0.5f ) * scale + 0.5f; + + ptrdiff_t isrcB = ptrdiff_t(srcB); + ptrdiff_t isrcA = isrcB - 1; + + if ( isrcA < 0 ) + { + isrcA = ( wrap ) ? ( source - 1) : 0; + } + + if ( size_t(isrcB) >= source ) + { + isrcB = ( wrap ) ? 0 : ( source - 1); + } + + float weight = 1.0f + float(isrcB) - srcB; + + auto& entry = lf[ u ]; + entry.u0 = size_t(isrcA); + entry.weight0 = weight; + + entry.u1 = size_t(isrcB); + entry.weight1 = 1.0f - weight; + } +} + +#define BILINEAR_INTERPOLATE( res, x, y, r0, r1 ) \ + res = ( y.weight0 * ( (r0)[ x.u0 ] * x.weight0 + (r0)[ x.u1 ] * x.weight1 ) ) \ + + ( y.weight1 * ( (r1)[ x.u0 ] * x.weight0 + (r1)[ x.u1 ] * x.weight1 ) ) + +#define TRILINEAR_INTERPOLATE( res, x, y, z, r0, r1, r2, r3 ) \ + res = ( z.weight0 * ( ( y.weight0 * ( (r0)[ x.u0 ] * x.weight0 + (r0)[ x.u1 ] * x.weight1 ) ) \ + + ( y.weight1 * ( (r1)[ x.u0 ] * x.weight0 + (r1)[ x.u1 ] * x.weight1 ) ) ) ) \ + + ( z.weight1 * ( ( y.weight0 * ( (r2)[ x.u0 ] * x.weight0 + (r2)[ x.u1 ] * x.weight1 ) ) \ + + ( y.weight1 * ( (r3)[ x.u0 ] * x.weight0 + (r3)[ x.u1 ] * x.weight1 ) ) ) ) + + +//------------------------------------------------------------------------------------- +// Cubic filtering helpers +//------------------------------------------------------------------------------------- + +XMGLOBALCONST XMVECTORF32 g_cubicThird = { 1.f/3.f, 1.f/3.f, 1.f/3.f, 1.f/3.f }; +XMGLOBALCONST XMVECTORF32 g_cubicSixth = { 1.f/6.f, 1.f/6.f, 1.f/6.f, 1.f/6.f }; +XMGLOBALCONST XMVECTORF32 g_cubicHalf = { 1.f/2.f, 1.f/2.f, 1.f/2.f, 1.f/2.f }; + +inline ptrdiff_t bounduvw( ptrdiff_t u, ptrdiff_t maxu, bool wrap, bool mirror ) +{ + if ( wrap ) + { + if ( u < 0 ) + { + u = maxu + u + 1; + } + else if ( u > maxu ) + { + u = u - maxu - 1; + } + } + else if ( mirror ) + { + if ( u < 0 ) + { + u = ( -u ) - 1; + } + else if ( u > maxu ) + { + u = maxu - (u - maxu - 1); + } + } + + // Handles clamp, but also a safety factor for degenerate images for wrap/mirror + u = std::min<ptrdiff_t>( u, maxu ); + u = std::max<ptrdiff_t>( u, 0 ); + + return u; +} + +struct CubicFilter +{ + size_t u0; + size_t u1; + size_t u2; + size_t u3; + float x; +}; + +inline void _CreateCubicFilter( _In_ size_t source, _In_ size_t dest, _In_ bool wrap, _In_ bool mirror, _Out_writes_(dest) CubicFilter* cf ) +{ + assert( source > 0 ); + assert( dest > 0 ); + assert( cf != 0 ); + + float scale = float(source) / float(dest); + + for( size_t u = 0; u < dest; ++u ) + { + float srcB = ( float(u) + 0.5f ) * scale - 0.5f; + + ptrdiff_t isrcB = bounduvw( ptrdiff_t(srcB), source - 1, wrap, mirror ); + ptrdiff_t isrcA = bounduvw( isrcB - 1, source - 1, wrap, mirror ); + ptrdiff_t isrcC = bounduvw( isrcB + 1, source - 1, wrap, mirror ); + ptrdiff_t isrcD = bounduvw( isrcB + 2, source - 1, wrap, mirror ); + + auto& entry = cf[ u ]; + entry.u0 = size_t(isrcA); + entry.u1 = size_t(isrcB); + entry.u2 = size_t(isrcC); + entry.u3 = size_t(isrcD); + + float x = srcB - float(isrcB); + entry.x = x; + } +} + +#define CUBIC_INTERPOLATE( res, dx, p0, p1, p2, p3 ) \ +{ \ + XMVECTOR a0 = (p1); \ + XMVECTOR d0 = (p0) - a0; \ + XMVECTOR d2 = (p2) - a0; \ + XMVECTOR d3 = (p3) - a0; \ + XMVECTOR a1 = d2 - g_cubicThird*d0 - g_cubicSixth*d3; \ + XMVECTOR a2 = g_cubicHalf*d0 + g_cubicHalf*d2; \ + XMVECTOR a3 = g_cubicSixth*d3 - g_cubicSixth*d0 - g_cubicHalf*d2; \ + XMVECTOR vdx = XMVectorReplicate( dx ); \ + XMVECTOR vdx2 = vdx * vdx; \ + XMVECTOR vdx3 = vdx2 * vdx; \ + res = a0 + a1*vdx + a2*vdx2 + a3*vdx3; \ +} + + +//------------------------------------------------------------------------------------- +// Triangle filtering helpers +//------------------------------------------------------------------------------------- + +namespace TriangleFilter +{ + struct FilterTo + { + size_t u; + float weight; + }; + + struct FilterFrom + { + size_t count; + size_t sizeInBytes; + FilterTo to[1]; // variable-sized array + }; + + struct Filter + { + size_t sizeInBytes; + size_t totalSize; + FilterFrom from[1]; // variable-sized array + }; + + struct TriangleRow + { + size_t remaining; + TriangleRow* next; + ScopedAlignedArrayXMVECTOR scanline; + + TriangleRow() : remaining(0), next(nullptr) {} + }; + + static const size_t TF_FILTER_SIZE = sizeof(Filter) - sizeof(FilterFrom); + static const size_t TF_FROM_SIZE = sizeof(FilterFrom) - sizeof(FilterTo); + static const size_t TF_TO_SIZE = sizeof(FilterTo); + + static const float TF_EPSILON = 0.00001f; + + inline HRESULT _Create( _In_ size_t source, _In_ size_t dest, _In_ bool wrap, _Inout_ std::unique_ptr<Filter>& tf ) + { + assert( source > 0 ); + assert( dest > 0 ); + + float scale = float(dest) / float(source); + float scaleInv = 0.5f / scale; + + // Determine storage required for filter and allocate memory if needed + size_t totalSize = TF_FILTER_SIZE + TF_FROM_SIZE + TF_TO_SIZE; + float repeat = (wrap) ? 1.f : 0.f; + + for( size_t u = 0; u < source; ++u ) + { + float src = float(u) - 0.5f; + float destMin = src * scale; + float destMax = destMin + scale; + + totalSize += TF_FROM_SIZE + TF_TO_SIZE + size_t( destMax - destMin + repeat + 1.f ) * TF_TO_SIZE * 2; + } + + uint8_t* pFilter = nullptr; + + if ( tf ) + { + // See if existing filter memory block is large enough to reuse + if ( tf->totalSize >= totalSize ) + { + pFilter = reinterpret_cast<uint8_t*>( tf.get() ); + } + else + { + // Need to reallocate filter memory block + tf.reset( nullptr ); + } + } + + if ( !tf ) + { + // Allocate filter memory block + pFilter = new (std::nothrow) uint8_t[ totalSize ]; + if ( !pFilter ) + return E_OUTOFMEMORY; + + tf.reset( reinterpret_cast<Filter*>( pFilter ) ); + tf->totalSize = totalSize; + } + + assert( pFilter != 0 ); + + // Filter setup + size_t sizeInBytes = TF_FILTER_SIZE; + size_t accumU = 0; + float accumWeight = 0.f; + + for( size_t u = 0; u < source; ++u ) + { + // Setup from entry + size_t sizeFrom = sizeInBytes; + auto pFrom = reinterpret_cast<FilterFrom*>( pFilter + sizeInBytes ); + sizeInBytes += TF_FROM_SIZE; + + if ( sizeInBytes > totalSize ) + return E_FAIL; + + size_t toCount = 0; + + // Perform two passes to capture the influences from both sides + for( size_t j = 0; j < 2; ++j ) + { + float src = float( u + j ) - 0.5f; + + float destMin = src * scale; + float destMax = destMin + scale; + + if ( !wrap ) + { + // Clamp + if ( destMin < 0.f ) + destMin = 0.f; + if ( destMax > float(dest) ) + destMax = float(dest); + } + + for( auto k = static_cast<ptrdiff_t>( floorf( destMin ) ); float(k) < destMax; ++k ) + { + float d0 = float(k); + float d1 = d0 + 1.f; + + size_t u0; + if ( k < 0 ) + { + // Handle wrap + u0 = size_t( k + ptrdiff_t(dest) ); + } + else if ( k >= ptrdiff_t(dest) ) + { + // Handle wrap + u0 = size_t( k - ptrdiff_t(dest) ); + } + else + { + u0 = size_t( k ); + } + + // Save previous accumulated weight (if any) + if ( u0 != accumU ) + { + if ( accumWeight > TF_EPSILON ) + { + auto pTo = reinterpret_cast<FilterTo*>( pFilter + sizeInBytes ); + sizeInBytes += TF_TO_SIZE; + ++toCount; + + if ( sizeInBytes > totalSize ) + return E_FAIL; + + pTo->u = accumU; + pTo->weight = accumWeight; + } + + accumWeight = 0.f; + accumU = u0; + } + + // Clip destination + if ( d0 < destMin ) + d0 = destMin; + if ( d1 > destMax ) + d1 = destMax; + + // Calculate average weight over destination pixel + + float weight; + if ( !wrap && src < 0.f ) + weight = 1.f; + else if ( !wrap && ( ( src + 1.f ) >= float(source) ) ) + weight = 0.f; + else + weight = (d0 + d1) * scaleInv - src; + + accumWeight += (d1 - d0) * ( j ? (1.f - weight) : weight ); + } + } + + // Store accumulated weight + if ( accumWeight > TF_EPSILON ) + { + auto pTo = reinterpret_cast<FilterTo*>( pFilter + sizeInBytes ); + sizeInBytes += TF_TO_SIZE; + ++toCount; + + if ( sizeInBytes > totalSize ) + return E_FAIL; + + pTo->u = accumU; + pTo->weight = accumWeight; + } + + accumWeight = 0.f; + + // Finalize from entry + pFrom->count = toCount; + pFrom->sizeInBytes = sizeInBytes - sizeFrom; + } + + tf->sizeInBytes = sizeInBytes; + + return S_OK; + } + +}; // namespace + +}; // namespace diff --git a/thirdparty/directxtex/DirectXTex/scoped.h b/thirdparty/directxtex/DirectXTex/scoped.h index 81816069..a02ae69a 100644 --- a/thirdparty/directxtex/DirectXTex/scoped.h +++ b/thirdparty/directxtex/DirectXTex/scoped.h @@ -11,9 +11,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. //------------------------------------------------------------------------------------- -#if defined(_MSC_VER) && (_MSC_VER > 1000) #pragma once -#endif #include <assert.h> #include <memory> @@ -22,12 +20,12 @@ //--------------------------------------------------------------------------------- struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } }; -typedef std::unique_ptr<float, aligned_deleter> ScopedAlignedArrayFloat; +typedef std::unique_ptr<float[], aligned_deleter> ScopedAlignedArrayFloat; #ifdef USE_XNAMATH -typedef std::unique_ptr<XMVECTOR, aligned_deleter> ScopedAlignedArrayXMVECTOR; +typedef std::unique_ptr<XMVECTOR[], aligned_deleter> ScopedAlignedArrayXMVECTOR; #else -typedef std::unique_ptr<DirectX::XMVECTOR, aligned_deleter> ScopedAlignedArrayXMVECTOR; +typedef std::unique_ptr<DirectX::XMVECTOR[], aligned_deleter> ScopedAlignedArrayXMVECTOR; #endif //--------------------------------------------------------------------------------- @@ -36,35 +34,3 @@ struct handle_closer { void operator()(HANDLE h) { assert(h != INVALID_HANDLE_VA typedef std::unique_ptr<void, handle_closer> ScopedHandle; inline HANDLE safe_handle( HANDLE h ) { return (h == INVALID_HANDLE_VALUE) ? 0 : h; } - - -//--------------------------------------------------------------------------------- -template<class T> class ScopedObject -{ -public: - explicit ScopedObject( T *p = 0 ) : _pointer(p) {} - ~ScopedObject() - { - if ( _pointer ) - { - _pointer->Release(); - _pointer = nullptr; - } - } - - bool IsNull() const { return (!_pointer); } - - T& operator*() { return *_pointer; } - T* operator->() { return _pointer; } - T** operator&() { return &_pointer; } - - void Reset(T *p = 0) { if ( _pointer ) { _pointer->Release(); } _pointer = p; } - - T* Get() const { return _pointer; } - -private: - ScopedObject(const ScopedObject&); - ScopedObject& operator=(const ScopedObject&); - - T* _pointer; -}; diff --git a/thirdparty/directxtex/MIT.txt b/thirdparty/directxtex/MIT.txt new file mode 100644 index 00000000..a2336dbc --- /dev/null +++ b/thirdparty/directxtex/MIT.txt @@ -0,0 +1,21 @@ + The MIT License (MIT) + +Copyright (c) 2015 Microsoft Corp + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, +merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be included in all copies +or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/thirdparty/directxtex/Microsoft Public License.rtf b/thirdparty/directxtex/Microsoft Public License.rtf deleted file mode 100644 index 390c7adb..00000000 --- a/thirdparty/directxtex/Microsoft Public License.rtf +++ /dev/null @@ -1,234 +0,0 @@ -{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} -{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f36\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}{\f38\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Tahoma;} -{\f39\fbidi \fswiss\fcharset0\fprq2{\*\panose 00000000000000000000}Verdana;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} -{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;} -{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} -{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;} -{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f40\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f41\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} -{\f43\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f44\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f45\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f46\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} -{\f47\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f48\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f50\fbidi \fswiss\fcharset238\fprq2 Arial CE;}{\f51\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;} -{\f53\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f54\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f55\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f56\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);} -{\f57\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f58\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f380\fbidi \froman\fcharset238\fprq2 Cambria Math CE;}{\f381\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;} -{\f383\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f384\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f387\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;}{\f388\fbidi \froman\fcharset163\fprq2 Cambria Math (Vietnamese);} -{\f400\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\f401\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\f403\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\f404\fbidi \froman\fcharset162\fprq2 Cambria Tur;} -{\f407\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\f408\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\f420\fbidi \fswiss\fcharset238\fprq2 Tahoma CE;}{\f421\fbidi \fswiss\fcharset204\fprq2 Tahoma Cyr;} -{\f423\fbidi \fswiss\fcharset161\fprq2 Tahoma Greek;}{\f424\fbidi \fswiss\fcharset162\fprq2 Tahoma Tur;}{\f425\fbidi \fswiss\fcharset177\fprq2 Tahoma (Hebrew);}{\f426\fbidi \fswiss\fcharset178\fprq2 Tahoma (Arabic);} -{\f427\fbidi \fswiss\fcharset186\fprq2 Tahoma Baltic;}{\f428\fbidi \fswiss\fcharset163\fprq2 Tahoma (Vietnamese);}{\f429\fbidi \fswiss\fcharset222\fprq2 Tahoma (Thai);}{\f430\fbidi \fswiss\fcharset238\fprq2 Verdana CE;} -{\f431\fbidi \fswiss\fcharset204\fprq2 Verdana Cyr;}{\f433\fbidi \fswiss\fcharset161\fprq2 Verdana Greek;}{\f434\fbidi \fswiss\fcharset162\fprq2 Verdana Tur;}{\f437\fbidi \fswiss\fcharset186\fprq2 Verdana Baltic;} -{\f438\fbidi \fswiss\fcharset163\fprq2 Verdana (Vietnamese);}{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} -{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} -{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} -{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} -{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} -{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;} -{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;} -{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\fhimajor\f31536\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} -{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} -{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} -{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} -{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} -{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} -{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} -{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} -{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;} -{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;} -{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} -{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} -{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} -{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0; -\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\*\defchp }{\*\defpap -\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 -\f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\s1\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\outlinelevel0\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 -\f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink15 \sqformat \spriority9 heading 1;}{\s2\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\outlinelevel1\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 -\f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink16 \sqformat \spriority9 heading 2;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\* -\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv -\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}{\*\cs15 \additive -\rtlch\fcs1 \ab\af0\afs32 \ltrch\fcs0 \b\f36\fs32\kerning32 \sbasedon10 \slink1 \slocked \spriority9 Heading 1 Char;}{\*\cs16 \additive \rtlch\fcs1 \ab\ai\af0\afs28 \ltrch\fcs0 \b\i\f36\fs28 \sbasedon10 \slink2 \slocked \spriority9 Heading 2 Char;}{ -\s17\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af38\afs16\alang1025 \ltrch\fcs0 \f38\fs16\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext17 \slink18 \ssemihidden \sunhideused \styrsid7424395 Balloon Text;} -{\*\cs18 \additive \rtlch\fcs1 \af38\afs16 \ltrch\fcs0 \f38\fs16 \sbasedon10 \slink17 \slocked \ssemihidden \styrsid7424395 Balloon Text Char;}{\*\cs19 \additive \rtlch\fcs1 \af0\afs16 \ltrch\fcs0 \fs16 -\sbasedon10 \ssemihidden \sunhideused \styrsid4538388 annotation reference;}{\s20\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs20\alang1025 \ltrch\fcs0 \f1\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 -\sbasedon0 \snext20 \slink21 \ssemihidden \sunhideused \styrsid4538388 annotation text;}{\*\cs21 \additive \rtlch\fcs1 \af1 \ltrch\fcs0 \f1 \sbasedon10 \slink20 \slocked \ssemihidden \styrsid4538388 Comment Text Char;}{ -\s22\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \ab\af1\afs20\alang1025 \ltrch\fcs0 \b\f1\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon20 \snext20 \slink23 \ssemihidden \sunhideused \styrsid4538388 -annotation subject;}{\*\cs23 \additive \rtlch\fcs1 \ab\af1 \ltrch\fcs0 \b\f1 \sbasedon21 \slink22 \slocked \ssemihidden \styrsid4538388 Comment Subject Char;}}{\*\rsidtbl \rsid213160\rsid284417\rsid417145\rsid481196\rsid551334\rsid723397\rsid786968 -\rsid1382437\rsid1390003\rsid1521043\rsid1530955\rsid1708989\rsid1783212\rsid1903779\rsid2431884\rsid3165084\rsid3416120\rsid3419781\rsid3754103\rsid3768194\rsid3831520\rsid4538130\rsid4538388\rsid4552277\rsid4680449\rsid4729674\rsid4865270\rsid4987534 -\rsid5128131\rsid5186068\rsid5601121\rsid5864350\rsid6186044\rsid6311778\rsid6384507\rsid6434687\rsid6561471\rsid6910344\rsid6947552\rsid7033180\rsid7424395\rsid7682010\rsid7690850\rsid7744081\rsid8151618\rsid8196281\rsid8198206\rsid8342723\rsid8350925 -\rsid8722561\rsid8852349\rsid8934457\rsid8944153\rsid9573035\rsid9635349\rsid9638545\rsid9724918\rsid10044820\rsid10095979\rsid10228618\rsid10449644\rsid10494075\rsid11166278\rsid11166751\rsid11285353\rsid11366513\rsid11494815\rsid11932529\rsid12061202 -\rsid12533699\rsid12536400\rsid12916885\rsid13264736\rsid13322831\rsid13440556\rsid13455614\rsid13597357\rsid13768671\rsid14097590\rsid14157399\rsid14229900\rsid14305025\rsid14314735\rsid14436896\rsid14565916\rsid14572556\rsid14688892\rsid14752433 -\rsid14904394\rsid15086147\rsid15749945\rsid15814398\rsid15927751\rsid16071312\rsid16126175\rsid16279402\rsid16391569\rsid16404661\rsid16452939\rsid16537688\rsid16606866\rsid16674896}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1 -\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\title Microsoft Permissive License (Ms-PL)}{\author Jonr}{\operator Chuck Walbourn}{\creatim\yr2007\mo2\dy23\hr15\min10}{\revtim\yr2011\mo8\dy15\hr15\min2} -{\printim\yr2006\mo9\dy28\hr8\min46}{\version3}{\edmins1}{\nofpages1}{\nofwords391}{\nofchars2230}{\*\company Microsoft}{\nofcharsws2616}{\vern49273}}{\*\userprops {\propname _NewReviewCycle}\proptype30{\staticval }}{\*\xmlnstbl {\xmlns1 http://schemas.mi -crosoft.com/office/word/2003/wordml}{\xmlns2 urn:schemas-microsoft-com:office:smarttags}}\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect -\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont0\relyonvml0\donotembedlingdata1\grfdocevents0\validatexml0\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors0\hyphcaps0\horzdoc\dghspace120\dgvspace120 -\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3\jcompress\viewkind1\viewscale100\splytwnine\ftnlytwnine\htmautsp\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct\asianbrkrule\rsidroot10494075 -\newtblstyruls\nogrowautofit\utinl \fet0{\*\wgrffmtfilter 2450}\ilfomacatclnup0\ltrpar \sectd \ltrsect\linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}} -{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (} -{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar -\s1\ql \li0\ri0\sb180\nowidctlpar\wrapdefault\faauto\outlinelevel0\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 -\fs31\cf1\kerning36\insrsid10494075\charrsid14688892 Microsoft}{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 \fs31\cf1\kerning36\insrsid10494075 }{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 \fs31\cf1\kerning36\insrsid5601121 Public}{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 -\fs31\cf1\kerning36\insrsid14688892 }{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 \fs31\cf1\kerning36\insrsid10494075 License (Ms-PL}{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 \fs31\cf1\kerning36\insrsid4552277 )}{\rtlch\fcs1 \af1\afs31 \ltrch\fcs0 -\fs31\cf1\kerning36\insrsid10494075 -\par }\pard\plain \ltrpar\ql \li0\ri0\sl336\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af39\afs17 \ltrch\fcs0 -\b\f39\fs17\insrsid10494075 -\par This license governs use of the accompanying software. If you use the software, you accept this license. If you do not accept the license, do not use the software. -\par }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6910344 -\par }\pard\plain \ltrpar\s2\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\outlinelevel1\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af39\afs23 \ltrch\fcs0 -\b\f39\fs23\insrsid10494075 1. Definitions -\par }\pard\plain \ltrpar\ql \li0\ri0\sl336\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 The terms \'93reproduce,\'94 \'93reproduction}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7744081 ,}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 \'94 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid551334 \'93derivative works,\'94}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7744081\charrsid7744081 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 and \'93distribution\'94 have the same meaning here as under -{\*\xmlopen\xmlns2{\factoidname place}}{\*\xmlopen\xmlns2{\factoidname country-region}}U.S.{\*\xmlclose}{\*\xmlclose} copyright law. -\par }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 A \'93contribution\'94 is the original software}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid4865270 ,}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 }{\rtlch\fcs1 -\af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid11932529 or}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 any additions or changes to the software. -\par }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid551334 A \'93c}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid551334\charrsid551334 ontributor\'94 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 is}{\rtlch\fcs1 \af39\afs17 -\ltrch\fcs0 \f39\fs17\insrsid12536400\charrsid551334 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid551334\charrsid551334 any person that }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 -distributes its contribution under this license.}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 -\par }\pard \ltrpar\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid14229900 {\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid4729674\delrsid4729674 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 \'93Licensed patents -\'94 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 are }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3831520 a contributor\rquote s }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 patent claims }{\rtlch\fcs1 -\af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3831520 that }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 read directly on }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3831520 its contribution.}{\rtlch\fcs1 \af1 \ltrch\fcs0 -\insrsid14229900\charrsid14229900 -\par }\pard\plain \ltrpar\s2\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\outlinelevel1\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af39\afs23 \ltrch\fcs0 -\b\f39\fs23\insrsid5186068 -\par }{\rtlch\fcs1 \ab\af39\afs23 \ltrch\fcs0 \b\f39\fs23\insrsid10494075 2. Grant of Rights -\par }\pard\plain \ltrpar\ql \li0\ri0\sl336\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 (A) Copyright Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3754103 each contributor }{\rtlch\fcs1 \af39\afs17 -\ltrch\fcs0 \f39\fs17\insrsid10494075 grants you a non-exclusive, worldwide, royalty-free copyright license to reproduce }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3754103 its contribution}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 , prepare derivative works of }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3754103 its contribution}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid12536400 ,}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 and distribute }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3754103 its contribution}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 or any derivative works that you create. -\par (B) Patent Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid9724918 each contributor }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 grants you a non-exclusive, worldwide, royalty-free license under }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid15814398 its }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 -licensed patents to make, have made, use, sell, offer for sale, }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid1390003 import, }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 and/or otherwise dispose of }{\rtlch\fcs1 \af39\afs17 -\ltrch\fcs0 \f39\fs17\insrsid8944153 its contribution in }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 the software or derivative works of }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid8944153 the contribution in }{\rtlch\fcs1 -\af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 the software. -\par }\pard\plain \ltrpar\s2\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\outlinelevel1\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af39\afs23 \ltrch\fcs0 -\b\f39\fs23\insrsid5186068 -\par }{\rtlch\fcs1 \ab\af39\afs23 \ltrch\fcs0 \b\f39\fs23\insrsid10494075 3. Conditions and Limitations -\par }\pard\plain \ltrpar\ql \li0\ri0\sl336\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af1\afs24\alang1025 \ltrch\fcs0 \f1\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid1530955 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 (A) No Trademark License- This license does not grant you rights to use }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid1708989 any contributors\rquote }{ -\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 name, logo, or trademarks. -\par (B) If you }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid8934457 bring a patent claim against }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10095979 any contributor}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 - over patents that you }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6947552 claim }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7682010 are }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6947552 infringe}{\rtlch\fcs1 -\af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7682010 d by}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 the software, your }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7682010 patent }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075 license}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid7682010 from such contributor}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 to the software ends automatically. -\par (C) If you distribute }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid3165084 any portion of }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 -the software, you must retain all copyright, patent, trademark, and attribution notices that are present in the software. -\par (D) If you distribute }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid15749945 any portion of the }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 software in source code form}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid14904394 ,}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 you may do so only under this license}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6384507 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid14904394 by including }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 a complete copy of this license with your distribution}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6384507 .}{\rtlch\fcs1 \af39\afs17 -\ltrch\fcs0 \f39\fs17\insrsid10494075 }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid6384507 I}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 f you distribute }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid15749945 -any portion of }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 the software in compiled or object code form}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid16452939 ,}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 - you may only do so under a license that complies with this license. -\par }\pard \ltrpar\ql \li0\ri0\sl336\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid14572556 {\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 (E) The software is licensed \'93as-is.\'94 You bear the risk of using it. }{ -\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid284417 The contributors }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 -give no express warranties, guarantees or conditions. You may have additional consumer rights under your local laws which this license cannot change. To the extent permitted under your local laws, }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid1783212 the contributors }{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 \f39\fs17\insrsid10494075 exclude the implied warranties of merchantability, fitness for a particular purpose and non-infringement.}{\rtlch\fcs1 \af39\afs17 \ltrch\fcs0 -\f39\fs17\insrsid10494075\charrsid14572556 -\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a -9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad -5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6 -b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0 -0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6 -a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f -c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512 -0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462 -a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865 -6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b -4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b -4757e8d3f729e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f -7468656d65312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87 -615b8116d8a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad -79482a9c0498f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b -5d8a314d3c94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab -999fb7b4717509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9 -699640f6719e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd586 -8b37a088d1e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d6 -0cf03ac1a5193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f -9e7ef3f2d117d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be -15c308d3f28acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a9979 -3849c26ae66252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d -32a423279a668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2a -f074481847bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86 -e877f0034e16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb -44f95d843b5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a -6409fb44d08741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c -3d9058edf2c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db0256 -5e85f3b9660d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276 -b9f7dec44b7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8 -c33585b5fb9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e -51440ca2e0088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95 -b21be5ceaf8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff -6dce591a26ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec6 -9ffb9e65d028d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239 -b75a5bb1e6345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a449 -59d366ad93b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e8 -2db8df9f30254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468 -656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4 -350d363f2451eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d2624 -52282e3198720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe5141 -73d9850528a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c020000130000000000000000 -0000000000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b00000000000000 -000000000000300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c0000000000000000000000000019 -0200007468656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b00001600000000 -000000000000000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027 -00000000000000000000000000a00900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d0100009b0a00000000} -{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d -617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169 -6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363 -656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e} -{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal; -\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3; -\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8; -\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 1;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 2;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 3;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 4; -\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 5;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 6;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 7;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 8;\lsdqformat1 \lsdpriority39 \lsdlocked0 toc 9; -\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle; -\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid; -\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph; -\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6; -\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis; -\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference; -\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography; -\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 0105000002000000180000004d73786d6c322e534158584d4c5265616465722e362e3000000000000000000000060000 -d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffff0c6ad98892f1d411a65f0040963251e5000000000000000000000000808a -33fc965bcc01feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000 -00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000 -000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000 -0000000000000000000000000000000000000000000000000105000000000000}}
\ No newline at end of file diff --git a/thirdparty/directxtex/ReadMe.txt b/thirdparty/directxtex/ReadMe.txt index 0423b920..41649e87 100644 --- a/thirdparty/directxtex/ReadMe.txt +++ b/thirdparty/directxtex/ReadMe.txt @@ -3,7 +3,7 @@ DIRECTX TEXTURE LIBRARY (DirectXTex) Copyright (c) Microsoft Corporation. All rights reserved. -November 15, 2012 +July 29, 2015 This package contains DirectXTex, a shared source library for reading and writing DDS files, and performing various texture content processing operations including @@ -13,12 +13,11 @@ use of the Windows Image Component (WIC) APIs. It also includes a simple .TGA re writer since this image file format is commonly used for texture content processing pipelines, but is not currently supported by a built-in WIC codec. -The source is written for Visual C++ 2010 using the Direct3D headers from either -a current DirectX SDK or Windows SDK. It can also be compiled using Visual Studio 2012 and the -Windows SDK 8.0 headers. +The source is written for Visual Studio 2012, 2013, or 2015. It is recommended that +you make use of the Windows 8.1 SDK and Windows 7 Service Pack 1 or later. -It is recommended that you make use of Visual C++ 2010 Service Pack 1 or VS 2012, and -Windows 7 Service Pack 1 or Windows 8. +NOTE: DirectXTex is not supported on Windows phone 8.0 because WIC is not available on +that platform. It is available on Windows phone starting in version 8.1. DDSTextureLoader\ This contains a streamlined version of the DirectX SDK sample DDSWithoutD3DX11 texture @@ -59,25 +58,25 @@ Texconv\ It supports the same arguments as the Texture Conversion Tool Extended (texconvex.exe) DirectX SDK utility. See <http://msdn.microsoft.com/en-us/library/ee422506.aspx>. The primary differences are the -10 and -11 arguments are not applicable; the filter names (POINT, LINEAR, CUBIC, - FANT, POINT_DITHER, LINEAR_DITHER, CUBIC_DITHER, FANT_DITHER); and support for the .TGA file format. + FANT or BOX, TRIANGLE, *_DITHER, *_DITHER_DIFFUSION); and support for the .TGA file format. This also includes support for JPEG XR/HD Photo bitmap formats (see <http://blogs.msdn.com/b/chuckw/archive/2011/01/19/known-issue-texconvex.aspx>) + +Texassemble\ + This DirectXTex sample is a command-line utility for creating cubemaps, volume maps, or + texture arrays from a set of individual input image files. DDSView\ This DirectXTex sample is a simple Direct3D 11-based viewer for DDS files. For array textures or volume maps, the "<" and ">" keyboard keys will show different images contained in the DDS. The "1" through "0" keys can also be used to jump to a specific image index. -XNAMath\ - This contains a copy of XNA Math version 2.05, which is an updated version of the library. This is - required if building content with USE_XNAMATH (the default for the VS 2010 projects). The VS 2012 - projects use DirectXMath in the Windows SDK 8.0 instead. - For details see - <http://blogs.msdn.com/b/chuckw/archive/2012/06/22/xna-math-version-2-05-smoothing-the-transition-to-directxmath.aspx> +All content and source code for this package are subject to the terms of the MIT License. +<http://opensource.org/licenses/MIT>. -All content and source code for this package except XNA Math are bound to the Microsoft Public License (Ms-PL) -<http://www.microsoft.com/en-us/openness/licenses.aspx#MPL>. The XNA Math library is subject -to the DirectX SDK (June 2010) End-User License Agreement. +Documentation is available at <https://github.com/Microsoft/DirectXTex/wiki>. + +For the latest version of DirectXTex, bug reports, etc. please visit the project site. http://go.microsoft.com/fwlink/?LinkId=248926 @@ -85,29 +84,17 @@ http://go.microsoft.com/fwlink/?LinkId=248926 ------------------------------------ RELEASE NOTES -* The DirectXTex library does not support block compression or decompression of mipmapped non-power-of-2 textures, - although DDSTextureLoader will load these files correctly if the underlying device supports it. - -* The DirectXTex library only supports CLAMP filtering, and does not yet support MIRROR or WRAP filtering - (WIC operations only support CLAMP filtering). +* The alpha mode specification for DDS files was updated between the March 2013 and April 2013 releases. Any + DDS files created using the DDS_FLAGS_FORCE_DX10_EXT_MISC2 flag or the texconv -dx10 switch using the + March 2013 release should be refreshed. -* The DirectXTex library only supports box and POINT filtering, and does not support LINEAR or CUBIC filtering, - for 3D volume mipmap-generation. - -* Due to the underlying Windows BMP WIC codec, alpha channels are not supported for 16bpp or 32bpp BMP pixel format files. The Windows 8 +* Due to the underlying Windows BMP WIC codec, alpha channels are not supported for 16bpp or 32bpp BMP pixel format files. The Windows 8.x version of the Windows BMP WIC codec does support 32bpp pixel formats with alpha when using the BITMAPV5HEADER file header. Note the updated WIC is available on Windows 7 SP1 with KB 2670838 installed. -* The WIC conversion cases currently ignore TEX_FILTER_SRGB_IN and TEX_FILTER_SRGB_OUT out. - -* For the DXGI 1.1 version of DirectXTex, 4:4:4:4 pixel format DDS files are always expanded to 8:8:8:8 upon load since DXGI 1.0 - and DXGI 1.1 versions of Direct3D do not support these resource formats. The DXGI 1.2 versions of DirectXTex and DDSTextureLoader - make use of the DXGI_FORMAT_B4G4R4A4_UNORM format instead. - * While DXGI 1.0 and DXGI 1.1 include 5:6:5 (DXGI_FORMAT_B5G6R5_UNORM) and 5:5:5:1 (DXGI_FORMAT_B5G5R5A1_UNORM) pixel format enumerations, the DirectX 10.x and 11.0 Runtimes do not support these formats for use with Direct3D. The DirectX 11.1 runtime, - DXGI 1.2, and the WDDM 1.2 driver model fully support 16bpp formats (5:6:5, 5:5:5:1, and 4:4:4:4). The DXGI 1.2 version of WICTextureLoader - will load 16bpp pixel images as 5:6:5 or 5:5:5:1 rather than expand them to 32bpp RGBA. + DXGI 1.2, and the WDDM 1.2 driver model fully support 16bpp formats (5:6:5, 5:5:5:1, and 4:4:4:4). * WICTextureLoader cannot load .TGA files unless the system has a 3rd party WIC codec installed. You must use the DirectXTex library for TGA file format support without relying on an add-on WIC codec. @@ -119,6 +106,129 @@ RELEASE NOTES ------------------------------------ RELEASE HISTORY +July 29, 2015 + Fixed rounding problem with 32-bit RGBA/BGRA format conversions + texconv: use CPU parallel compression for BC1-BC5 (-singleproc disables) + Updated for VS 2015 and Windows 10 SDK RTM + Retired VS 2010 and Windows 8.0 Store projects + +June 18, 2015 + New BC_FLAGS_USE_3SUBSETS option for BC7 compressors; now defaults to skipping 3 subset blocks + Fixed bug with MakeTypeless and A8_UNORM + Fixed file length validation problem in LoadDDSFromFile + +March 27, 2015 + Added projects for Windows apps Technical Preview + Fixed bug with WIC-based mipmap generation for non-WIC supported formats + Fixed bug with WIC multiframe loader when resizing required + texconv: Added -nmap/-nmapamp for generating normal maps from height maps + texconv/texassemble: Updated to load multiframe WIC files (tiff, gif) + Minor code cleanup + +November 24, 2014 + Updates for Visual Studio 2015 Technical Preview + Minor code cleanup + +September 22, 2014 + Format conversion improvements and bug fixes (depth/stencil, alpha-only, float16, RGB -> 1 channel) + Fixed issue when BC decompressing non-standard compressed rowPitch images + Explicit calling-convention annotation for all 'public' functions + Code cleanup + Xbox One platform updates + +July 15, 2014 + texconv command-line tool fixes + Fixed problem with 'wide' images with CPU Compress + Updates to Xbox One platform support + +April 3, 2014 + Windows phone 8.1 platform support + +February 24, 2014 + Direct3D 11 video and Xbox One extended format support + New APIs: IsPlanar, IsPalettized, IsDepthStencil, ConvertToSinglePlane + Added 'alphaWeight' parameter to GPU Compress [breaking change] + texconv '-aw' switch to control the alpha weighting for the BC7 GPU compressor + Fixed bug with ordered dithering in non-WIC conversion codepaths + Fixed SaveToDDS* functions when using arbitrary row pitch values + +January 24, 2014 + Added sRGB flags for Compress (TEX_COMPRESS_SRGB*) + Added 'compress' flag parameter to GPU versions of Compress [breaking change] + Minor fix for potential rounding problem in GPU Compress + Code cleanup (removed DXGI_1_2_FORMATS control define; ScopedObject typedef removed) + Dropped VS 2010 support without the Windows 8.1 SDK (removed USE_XNAMATH control define) + +December 24, 2013 + texconv updated with -fl and -pow2 command-line switches + Fixed bug in Resize when doing custom filtering which occurred when exactly doubling the image size + Added move operators to ScratchImage and Blob classes + Xbox One platform support + +October 21, 2013 + Updated for Visual Studio 2013 and Windows 8.1 SDK RTM + PremultiplyAlpha updated with new 'flags' parameter and to use sRGB correct blending + Fixed colorspace conversion issue with DirectCompute compressor when compressing for BC7 SRGB + +August 13, 2013 + DirectCompute 4.0 BC6H/BC7 compressor integration + texconv utility uses DirectCompute compression by default for BC6H/BC7, -nogpu disables use of DirectCompute + +August 1, 2013 + Support for BC compression/decompression of non-power-of-2 mipmapped textures + Fixes for BC6H / BC7 codecs to better match published standard + Fix for BC4 / BC5 codecs when compressing RGB images + Minor fix for the BC1-3 codec + New optional flags for ComputeMSE to compare UNORM vs. SNORM images + New WIC loading flag added to control use of WIC metadata to return sRGB vs. non-sRGB formats + Code cleanup and /analyze fixes + Project file cleanup + Texconv utility uses parallel BC compression by default for BC6H/BC7, -singleproc disables multithreaded behavior + +July 1, 2013 + VS 2013 Preview projects added + SaveToWIC functions updated with new optional setCustomProps parameter + +June 15, 2013 + Custom filtering implementation for Resize & GenerateMipMaps(3D) - Point, Box, Linear, Cubic, and Triangle + TEX_FILTER_TRIANGLE finite low-pass triangle filter + TEX_FILTER_WRAP, TEX_FILTER_MIRROR texture semantics for custom filtering + TEX_FILTER_BOX alias for TEX_FILTER_FANT WIC + Ordered and error diffusion dithering for non-WIC conversion + sRGB gamma correct custom filtering and conversion + DDS_FLAGS_EXPAND_LUMINANCE - Reader conversion option for L8, L16, and A8L8 legacy DDS files + Added use of WIC metadata for sRGB pixel formats + Added BitsPerColor utility function + Fixed Convert threshold parameter usage + Non-power-of-2 volume map support, fixed bug with non-square volume maps + Texconv utility update with -xlum, -wrap, and -mirror options; reworked -if options for improved dithering + Texassemble utility for creating cubemaps, volume maps, and texture arrays + DDSTextureLoader and WICTextureLoader sync'd with DirectXTK versions + +April 16, 2013 + Updated alpha-mode metadata details in .DDS files + Added new control flags for Convert + Added new optional flags for ComputeMSE + Fixed conversion handling for sRGB formats + Fixed internal routines for handling R10G10B10_XR_BIAS_A2_UNORM, R9G9B9E5_SHAREDEXP, and FORMAT_R1_UNORM + Fixed WIC I/O for GUID_WICPixelFormat32bppRGBE pixel format files (HD Photo) + Fixed non-square image handling in GenerateMipMaps3D + Fixed some error handling in the DDS load code + +March 22, 2013 + Supports reading and writing alpha-mode (straight, premultiplied, etc.) metadata in .DDS files + Added build option to use WICCreateImagingFactory_Proxy instead of CoCreateInstance to obtain WIC factory + +January 29, 2013 + Added PremultiplyAlpha to DirectXTex; -pmalpha switch for texconv command-line tool + Fixed problem with forceSRGB implementation for Ex versions of CreateTexture, CreateShaderResourceView, DDSTextureLoader and WICTextureLoader + +December 11, 2012 + Ex versions of CreateTexture, CreateShaderResourceView, DDSTextureLoader and WICTextureLoader + Fixed BC2 and BC3 decompression issue for unusual color encoding case + Converted annotation to SAL2 for improved VS 2012 /analyze experience + Updated DirectXTex, DDSView, and Texconv with VS 2010 + Windows 8.0 SDK project using official 'property sheets' + November 15, 2012 Added support for WIC2 when available on Windows 8 and Windows 7 with KB 2670838 Added optional targetGUID parameter to SaveWIC* APIs to influence final container pixel format choice diff --git a/thirdparty/mingw/wrl/wrl/client.h b/thirdparty/mingw/wrl/wrl/client.h new file mode 100644 index 00000000..60890d69 --- /dev/null +++ b/thirdparty/mingw/wrl/wrl/client.h @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2014-2015 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + + +#include <assert.h> + +#include <windows.h> + + +namespace Microsoft { +namespace WRL { + +/** + * See + * - https://msdn.microsoft.com/en-us/library/br244983.aspx + * - https://msdn.microsoft.com/en-us/magazine/dn904668.aspx + */ +template< typename T > +class ComPtr +{ +private: + T *p; + +public: + ComPtr(void) : + p(nullptr) + { + } + + ComPtr(T *_p) : + p(_p) + { + } + + ~ComPtr() { + T *temp = p; + p = nullptr; + if (temp) { + temp->Release(); + } + } + + T ** + GetAddressOf(void) { + assert(p == nullptr); + return &p; + } + + T * + Get(void) const { + return p; + } + + struct no_ref_count : public T + { + private: + ULONG STDMETHODCALLTYPE AddRef(void); + ULONG STDMETHODCALLTYPE Release(void); + }; + + // Methods + no_ref_count * + operator -> () const { + assert(p != nullptr); + return static_cast< no_ref_count *>(p); + } + + ComPtr & + operator = (T *q) { + if (p != q) { + T *temp = p; + p = q; + if (temp) { + temp->Release(); + } + if (q) { + q->AddRef(); + } + } + return *this; + } + + ComPtr(const ComPtr &) = delete; + ComPtr & operator= (const ComPtr &) = delete; +}; + +} // namespace WRL +} // namespace Microsoft |