diff options
author | Tim Rowley <timothy.o.rowley@intel.com> | 2016-09-06 12:36:02 -0500 |
---|---|---|
committer | Tim Rowley <timothy.o.rowley@intel.com> | 2016-09-19 20:10:19 -0500 |
commit | 2f86a9577adf5c43e892f899224d0f73ff1d37c2 (patch) | |
tree | cbf7087434879fcfc4ae269cbeb5015ca3cdfc36 | |
parent | 04026b43c89c6fdb794650f8c80e356707cc6d69 (diff) |
swr: [rasterizer core] Add macros for mapping ArchRast to buckets
Switch all RDTSC_START/STOP macros to use AR_BEGIN/END macros.
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 56 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/backend.cpp | 154 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/backend.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.cpp | 15 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 36 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 74 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp | 46 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/threads.cpp | 14 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp | 26 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 2 |
11 files changed, 249 insertions, 200 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 6bdb8f4b41..df87d14ca3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -46,8 +46,6 @@ #include "common/simdintrin.h" #include "common/os.h" -#include "archrast/archrast.h" - static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y }; void SetupDefaultState(SWR_CONTEXT *pContext); @@ -264,9 +262,9 @@ void QueueWork(SWR_CONTEXT *pContext) } else { - RDTSC_START(APIDrawWakeAllThreads); + AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId); WakeAllThreads(pContext); - RDTSC_STOP(APIDrawWakeAllThreads, 1, 0); + AR_API_END(APIDrawWakeAllThreads, 1); } // Set current draw context to NULL so that next state call forces a new draw context to be created and populated. @@ -286,7 +284,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext) DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) { - RDTSC_START(APIGetDrawContext); + AR_API_BEGIN(APIGetDrawContext, 0); // If current draw context is null then need to obtain a new draw context to use from ring. if (pContext->pCurDrawContext == nullptr) { @@ -372,7 +370,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC"); } - RDTSC_STOP(APIGetDrawContext, 0, 0); + AR_API_END(APIGetDrawContext, 0); return pContext->pCurDrawContext; } @@ -418,13 +416,13 @@ void SetupDefaultState(SWR_CONTEXT *pContext) void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3) { - RDTSC_START(APISync); - SWR_ASSERT(pfnFunc != nullptr); SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); + AR_API_BEGIN(APISync, 0); + pDC->FeWork.type = SYNC; pDC->FeWork.pfnWork = ProcessSync; @@ -437,35 +435,35 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint //enqueue QueueDraw(pContext); - RDTSC_STOP(APISync, 1, 0); + AR_API_END(APISync, 1); } void SwrWaitForIdle(HANDLE hContext) { SWR_CONTEXT *pContext = GetContext(hContext); - RDTSC_START(APIWaitForIdle); + AR_API_BEGIN(APIWaitForIdle, 0); while (!pContext->dcRing.IsEmpty()) { _mm_pause(); } - RDTSC_STOP(APIWaitForIdle, 1, 0); + AR_API_END(APIWaitForIdle, 1); } void SwrWaitForIdleFE(HANDLE hContext) { SWR_CONTEXT *pContext = GetContext(hContext); - RDTSC_START(APIWaitForIdle); + AR_API_BEGIN(APIWaitForIdle, 0); while (pContext->drawsOutstandingFE > 0) { _mm_pause(); } - RDTSC_STOP(APIWaitForIdle, 1, 0); + AR_API_END(APIWaitForIdle, 1); } void SwrSetVertexBuffers( @@ -1080,11 +1078,11 @@ void DrawInstanced( return; } - RDTSC_START(APIDraw); - SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); + AR_API_BEGIN(APIDraw, pDC->drawId); + uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw); uint32_t remainingVerts = numVertices; @@ -1139,7 +1137,7 @@ void DrawInstanced( pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - RDTSC_STOP(APIDraw, numVertices * numInstances, 0); + AR_API_END(APIDraw, numVertices * numInstances); } ////////////////////////////////////////////////////////////////////////// @@ -1200,14 +1198,12 @@ void DrawIndexedInstance( return; } - RDTSC_START(APIDrawIndexed); - SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); API_STATE* pState = &pDC->pState->state; - AR_BEGIN(AR_API_CTX, APIDrawIndexed, pDC->drawId); - AR_EVENT(AR_API_CTX, DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); + AR_API_BEGIN(APIDrawIndexed, pDC->drawId); + AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw); @@ -1280,8 +1276,7 @@ void DrawIndexedInstance( pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - AR_END(AR_API_CTX, APIDrawIndexed, numIndices * numInstances); - RDTSC_STOP(APIDrawIndexed, numIndices * numInstances, 0); + AR_API_END(APIDrawIndexed, numIndices * numInstances); } @@ -1406,10 +1401,11 @@ void SwrDispatch( return; } - RDTSC_START(APIDispatch); SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); + AR_API_BEGIN(APIDispatch, pDC->drawId); + pDC->isCompute = true; // This is a compute context. COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64); @@ -1424,7 +1420,7 @@ void SwrDispatch( pDC->pDispatch->initialize(totalThreadGroups, pTaskData); QueueDispatch(pContext); - RDTSC_STOP(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ, 0); + AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); } // Deswizzles, converts and stores current contents of the hot tiles to surface @@ -1440,11 +1436,11 @@ void SWR_API SwrStoreTiles( return; } - RDTSC_START(APIStoreTiles); - SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); + AR_API_BEGIN(APIStoreTiles, pDC->drawId); + pDC->FeWork.type = STORETILES; pDC->FeWork.pfnWork = ProcessStoreTiles; pDC->FeWork.desc.storeTiles.attachment = attachment; @@ -1455,7 +1451,7 @@ void SWR_API SwrStoreTiles( //enqueue QueueDraw(pContext); - RDTSC_STOP(APIStoreTiles, 0, 0); + AR_API_END(APIStoreTiles, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1479,11 +1475,11 @@ void SWR_API SwrClearRenderTarget( return; } - RDTSC_START(APIClearRenderTarget); - SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); + AR_API_BEGIN(APIClearRenderTarget, pDC->drawId); + CLEAR_FLAGS flags; flags.bits = 0; flags.mask = clearMask; @@ -1503,7 +1499,7 @@ void SWR_API SwrClearRenderTarget( // enqueue draw QueueDraw(pContext); - RDTSC_STOP(APIClearRenderTarget, 0, pDC->drawId); + AR_API_END(APIClearRenderTarget, 1); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 0e92ccf2c8..d3d114ecdb 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -47,10 +47,10 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS]; /// @param threadGroupId - the linear index for the thread group within the dispatch. void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer) { - RDTSC_START(BEDispatch); - SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(BEDispatch, pDC->drawId); + const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData(); SWR_ASSERT(pTaskData != nullptr); @@ -75,7 +75,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup UPDATE_STAT(CsInvocations, state.totalThreadsInGroup); - RDTSC_STOP(BEDispatch, 1, 0); + AR_END(BEDispatch, 1); } void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData) @@ -180,16 +180,17 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData) { + SWR_CONTEXT *pContext = pDC->pContext; + if (KNOB_FAST_CLEAR) { CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; - SWR_CONTEXT *pContext = pDC->pContext; SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount; uint32_t numSamples = GetNumSamples(sampleCount); SWR_ASSERT(pClear->flags.bits != 0); // shouldn't be here without a reason. - RDTSC_START(BEClear); + AR_BEGIN(BEClear, pDC->drawId); if (pClear->flags.mask & SWR_CLEAR_COLOR) { @@ -217,13 +218,13 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo pHotTile->state = HOTTILE_CLEAR; } - RDTSC_STOP(BEClear, 0, 0); + AR_END(BEClear, 1); } else { // Legacy clear CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; - RDTSC_START(BEClear); + AR_BEGIN(BEClear, pDC->drawId); if (pClear->flags.mask & SWR_CLEAR_COLOR) { @@ -265,17 +266,18 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect); } - RDTSC_STOP(BEClear, 0, 0); + AR_END(BEClear, 1); } } void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) { - RDTSC_START(BEStoreTiles); STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData; SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(BEStoreTiles, pDC->drawId); + #ifdef KNOB_ENABLE_RDTSC uint32_t numTiles = 0; #endif @@ -326,7 +328,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState; } } - RDTSC_STOP(BEStoreTiles, numTiles, pDC->drawId); + AR_END(BEStoreTiles, numTiles); } @@ -387,8 +389,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala template<typename T> void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - RDTSC_START(BESingleSampleBackend); - RDTSC_START(BESetup); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(BESingleSampleBackend, pDC->drawId); + AR_BEGIN(BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -423,7 +427,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 pColorBase[rt] = renderBuffers.pColor[rt]; } uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil; - RDTSC_STOP(BESetup, 0, 0); + AR_END(BESetup, 1); SWR_PS_CONTEXT psContext; psContext.pAttribs = work.pAttribs; @@ -462,7 +466,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, pBlendState->sampleMask); } - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); // for 1x case, centroid is pixel center @@ -475,7 +479,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 1); simdmask clipCoverageMask = coverageMask & MASK; // interpolate user clip distance if available @@ -492,10 +496,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // Early-Z? if(T::bCanEarlyZ) { - RDTSC_START(BEEarlyDepthTest); + AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask); - RDTSC_STOP(BEEarlyDepthTest, 0, 0); + AR_END(BEEarlyDepthTest, 0); // early-exit if no pixels passed depth or earlyZ is forced on if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -514,20 +518,20 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - RDTSC_START(BEPixelShader); + AR_BEGIN(BEPixelShader, pDC->drawId); UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); - RDTSC_STOP(BEPixelShader, 0, 0); + AR_END(BEPixelShader, 0); vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z if(!T::bCanEarlyZ) { - RDTSC_START(BELateDepthTest); + AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask); - RDTSC_STOP(BELateDepthTest, 0, 0); + AR_END(BELateDepthTest, 0); if(!_simd_movemask_ps(depthPassMask)) { @@ -543,7 +547,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 UPDATE_STAT(DepthPassCount, statCount); // output merger - RDTSC_START(BEOutputMerger); + AR_BEGIN(BEOutputMerger, pDC->drawId); OutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets); // do final depth write after all pixel kills @@ -552,11 +556,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask); } - RDTSC_STOP(BEOutputMerger, 0, 0); + AR_END(BEOutputMerger, 0); } Endtile: - RDTSC_START(BEEndTile); + AR_BEGIN(BEEndTile, pDC->drawId); coverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) { @@ -569,17 +573,19 @@ Endtile: { pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8; } - RDTSC_STOP(BEEndTile, 0, 0); + AR_END(BEEndTile, 0); } } - RDTSC_STOP(BESingleSampleBackend, 0, 0); + AR_END(BESingleSampleBackend, 0); } template<typename T> void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - RDTSC_START(BESampleRateBackend); - RDTSC_START(BESetup); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(BESampleRateBackend, pDC->drawId); + AR_BEGIN(BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -613,7 +619,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ pColorBase[rt] = renderBuffers.pColor[rt]; } uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil; - RDTSC_STOP(BESetup, 0, 0); + AR_END(BESetup, 0); SWR_PS_CONTEXT psContext; psContext.pAttribs = work.pAttribs; @@ -643,9 +649,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // pixel center psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx)); - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); if(T::InputCoverage != SWR_INPUT_COVERAGE_NONE) { @@ -657,7 +663,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ if(T::bCentroidPos) { ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); if(T::bIsStandardPattern) { CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); @@ -668,7 +674,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.vY.centroid = _simd_add_ps(psContext.vY.UL, _simd_set1_ps(0.5f)); } CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); } else { @@ -681,7 +687,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ simdmask coverageMask = work.coverageMask[sample] & MASK; if (coverageMask) { - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample)); psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample)); @@ -691,7 +697,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); // interpolate user clip distance if available if (rastState.clipDistanceMask) @@ -711,10 +717,10 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // Early-Z? if (T::bCanEarlyZ) { - RDTSC_START(BEEarlyDepthTest); + AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); - RDTSC_STOP(BEEarlyDepthTest, 0, 0); + AR_END(BEEarlyDepthTest, 0); // early-exit if no samples passed depth or earlyZ is forced on. if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -734,20 +740,20 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - RDTSC_START(BEPixelShader); + AR_BEGIN(BEPixelShader, pDC->drawId); UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); - RDTSC_STOP(BEPixelShader, 0, 0); + AR_END(BEPixelShader, 0); vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z if (!T::bCanEarlyZ) { - RDTSC_START(BELateDepthTest); + AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); - RDTSC_STOP(BELateDepthTest, 0, 0); + AR_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) { @@ -765,7 +771,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ UPDATE_STAT(DepthPassCount, statCount); // output merger - RDTSC_START(BEOutputMerger); + AR_BEGIN(BEOutputMerger, pDC->drawId); OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets); // do final depth write after all pixel kills @@ -774,11 +780,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); } - RDTSC_STOP(BEOutputMerger, 0, 0); + AR_END(BEOutputMerger, 0); } work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); } - RDTSC_START(BEEndTile); + AR_BEGIN(BEEndTile, pDC->drawId); if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) { work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); @@ -790,17 +796,19 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ { pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8; } - RDTSC_STOP(BEEndTile, 0, 0); + AR_END(BEEndTile, 0); } } - RDTSC_STOP(BESampleRateBackend, 0, 0); + AR_END(BESampleRateBackend, 0); } template<typename T> void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - RDTSC_START(BEPixelRateBackend); - RDTSC_START(BESetup); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(BEPixelRateBackend, pDC->drawId); + AR_BEGIN(BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -834,7 +842,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t pColorBase[rt] = renderBuffers.pColor[rt]; } uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil; - RDTSC_STOP(BESetup, 0, 0); + AR_END(BESetup, 0); SWR_PS_CONTEXT psContext; psContext.pAttribs = work.pAttribs; @@ -852,7 +860,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t psContext.sampleIndex = 0; - PixelRateZTestLoop<T> PixelRateZTest(pDC, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask); + PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask); for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM) { @@ -868,9 +876,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // set pixel center positions psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx)); - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE) { @@ -882,7 +890,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(T::bCentroidPos) { ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); if(T::bIsStandardPattern) { CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); @@ -894,7 +902,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t } CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); } else { @@ -921,11 +929,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(pPSState->usesSourceDepth) { - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); } // pixels that are currently active @@ -933,10 +941,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t psContext.oMask = T::MultisampleT::FullSampleMask(); // execute pixel shader - RDTSC_START(BEPixelShader); + AR_BEGIN(BEPixelShader, pDC->drawId); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); - RDTSC_STOP(BEPixelShader, 0, 0); + AR_END(BEPixelShader, 0); // update active lanes to remove any discarded or oMask'd pixels activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si()))); @@ -956,7 +964,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // loop over all samples, broadcasting the results of the PS to all passing pixels for(uint32_t sample = 0; sample < GetNumOMSamples<T>(pBlendState->sampleCount); sample++) { - RDTSC_START(BEOutputMerger); + AR_BEGIN(BEOutputMerger, pDC->drawId); // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0; simdscalar coverageMask, depthMask; @@ -971,7 +979,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(!_simd_movemask_ps(depthMask)) { // stencil should already have been written in early/lateZ tests - RDTSC_STOP(BEOutputMerger, 0, 0); + AR_END(BEOutputMerger, 0); continue; } } @@ -987,10 +995,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum], pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]); } - RDTSC_STOP(BEOutputMerger, 0, 0); + AR_END(BEOutputMerger, 0); } Endtile: - RDTSC_START(BEEndTile); + AR_BEGIN(BEEndTile, pDC->drawId); for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) { work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); @@ -1008,19 +1016,21 @@ Endtile: { pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8; } - RDTSC_STOP(BEEndTile, 0, 0); + AR_END(BEEndTile, 0); } } - RDTSC_STOP(BEPixelRateBackend, 0, 0); + AR_END(BEPixelRateBackend, 0); } // optimized backend flow with NULL PS template<uint32_t sampleCountT> void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - RDTSC_START(BENullBackend); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(BENullBackend, pDC->drawId); ///@todo: handle center multisample pattern typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T; - RDTSC_START(BESetup); + AR_BEGIN(BESetup, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = pDC->pState->state.rastState; @@ -1043,7 +1053,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil; - RDTSC_STOP(BESetup, 0, 0); + AR_END(BESetup, 0); SWR_PS_CONTEXT psContext; for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM) @@ -1065,7 +1075,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, simdmask coverageMask = work.coverageMask[sample] & MASK; if (coverageMask) { - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample)); psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample)); @@ -1076,7 +1086,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); // interpolate user clip distance if available if (rastState.clipDistanceMask) @@ -1092,12 +1102,12 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample); uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample); - RDTSC_START(BEEarlyDepthTest); + AR_BEGIN(BEEarlyDepthTest, pDC->drawId); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); - RDTSC_STOP(BEEarlyDepthTest, 0, 0); + AR_END(BEEarlyDepthTest, 0); uint32_t statMask = _simd_movemask_ps(depthPassMask); uint32_t statCount = _mm_popcnt_u32(statMask); @@ -1109,7 +1119,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8; } } - RDTSC_STOP(BENullBackend, 0, 0); + AR_END(BENullBackend, 0); } void InitClearTilesTable() diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h index fde5a3f8d9..9d2f317f31 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend.h @@ -432,15 +432,17 @@ INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount) template<typename T> struct PixelRateZTestLoop { - PixelRateZTestLoop(DRAW_CONTEXT *DC, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState, + PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState, uint8_t*& depthBase, uint8_t*& stencilBase, const uint8_t ClipDistanceMask) : - work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState), + pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState), clipDistanceMask(ClipDistanceMask), pDepthBase(depthBase), pStencilBase(stencilBase) {}; INLINE uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext, const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0) { + SWR_CONTEXT *pContext = pDC->pContext; + uint32_t statCount = 0; simdscalar anyDepthSamplePassed = _simd_setzero_ps(); for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) @@ -454,7 +456,7 @@ struct PixelRateZTestLoop continue; } - RDTSC_START(BEBarycentric); + AR_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample)); psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample)); @@ -472,7 +474,7 @@ struct PixelRateZTestLoop vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); vZ[sample] = state.pfnQuantizeDepth(vZ[sample]); } - RDTSC_STOP(BEBarycentric, 0, 0); + AR_END(BEBarycentric, 0); ///@todo: perspective correct vs non-perspective correct clipping? // if clip distances are enabled, we need to interpolate for each sample @@ -488,13 +490,14 @@ struct PixelRateZTestLoop uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample); // ZTest for this sample - RDTSC_START(BEDepthBucket); + ///@todo Need to uncomment out this bucket. + //AR_BEGIN(BEDepthBucket, pDC->drawId); depthPassMask[sample] = vCoverageMask[sample]; stencilPassMask[sample] = vCoverageMask[sample]; depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]); - RDTSC_STOP(BEDepthBucket, 0, 0); + //AR_END(BEDepthBucket, 0); // early-exit if no pixels passed depth or earlyZ is forced on if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample])) @@ -525,6 +528,9 @@ struct PixelRateZTestLoop private: // functor inputs + DRAW_CONTEXT* pDC; + uint32_t workerId; + const SWR_TRIANGLE_DESC& work; const BarycentricCoeffs& coeffs; const API_STATE& state; diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index 21cbb0a062..7b1e09d16e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -181,24 +181,27 @@ void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float * void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx) { - RDTSC_START(FEClipTriangles); + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipTriangles, pDC->drawId); Clipper<3> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); - RDTSC_STOP(FEClipTriangles, 1, 0); + AR_END(FEClipTriangles, 1); } void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx) { - RDTSC_START(FEClipLines); + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipLines, pDC->drawId); Clipper<2> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); - RDTSC_STOP(FEClipLines, 1, 0); + AR_END(FEClipLines, 1); } void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx) { - RDTSC_START(FEClipPoints); + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipPoints, pDC->drawId); Clipper<1> clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); - RDTSC_STOP(FEClipPoints, 1, 0); + AR_END(FEClipPoints, 1); } diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 2f3ce85442..43bc5222c8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -501,6 +501,10 @@ public: // execute the clipper stage void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx) { + SWR_ASSERT(pa.pDC != nullptr); + + SWR_CONTEXT *pContext = pa.pDC->pContext; + // set up binner based on PA state PFN_PROCESS_PRIMS pfnBinner; switch (pa.binTopology) @@ -548,11 +552,11 @@ public: if (clipMask) { - RDTSC_START(FEGuardbandClip); + AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx); - RDTSC_STOP(FEGuardbandClip, 1, 0); + AR_END(FEGuardbandClip, 1); } else if (validMask) { diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 6d63e087bc..a4dbbc5280 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -42,6 +42,7 @@ #include "common/simdintrin.h" #include "core/threads.h" #include "ringbuffer.h" +#include "archrast/archrast.h" // x.8 fixed point precision values #define FIXED_POINT_SHIFT 8 @@ -515,15 +516,30 @@ struct SWR_CONTEXT #define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; } // ArchRast instrumentation framework -#ifdef KNOB_ENABLE_AR -#define AR_WORKER_CTX pDC->pContext->pArContext[workerId] -#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads] +#define AR_WORKER_CTX pContext->pArContext[workerId] +#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads] -#define AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id)) -#define AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count)) -#define AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event) +#ifdef KNOB_ENABLE_AR + #define _AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id)) + #define _AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count)) + #define _AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event) #else -#define AR_BEGIN(ctx, type, id) -#define AR_END(ctx, type, id) -#define AR_EVENT(ctx, event) -#endif
\ No newline at end of file + #ifdef KNOB_ENABLE_RDTSC + #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type) + #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0) + #else + #define _AR_BEGIN(ctx, type, id) (void)ctx + #define _AR_END(ctx, type, id) + #endif + #define _AR_EVENT(ctx, event) +#endif + +// Use these macros for api thread. +#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id) +#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count) +#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event) + +// Use these macros for worker threads. +#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id) +#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count) +#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index db470784a5..decc161f1f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -130,7 +130,7 @@ void ProcessStoreTiles( uint32_t workerId, void *pUserData) { - RDTSC_START(FEProcessStoreTiles); + AR_BEGIN(FEProcessStoreTiles, pDC->drawId); MacroTileMgr *pTileMgr = pDC->pTileMgr; STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; @@ -155,7 +155,7 @@ void ProcessStoreTiles( } } - RDTSC_STOP(FEProcessStoreTiles, 0, pDC->drawId); + AR_END(FEProcessStoreTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -171,7 +171,7 @@ void ProcessDiscardInvalidateTiles( uint32_t workerId, void *pUserData) { - RDTSC_START(FEProcessInvalidateTiles); + AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId); DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; @@ -210,7 +210,7 @@ void ProcessDiscardInvalidateTiles( } } - RDTSC_STOP(FEProcessInvalidateTiles, 0, pDC->drawId); + AR_END(FEProcessInvalidateTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -542,7 +542,9 @@ static void StreamOut( uint32_t* pPrimData, uint32_t streamIndex) { - RDTSC_START(FEStreamout); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(FEStreamout, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_STREAMOUT_STATE &soState = state.soState; @@ -615,7 +617,7 @@ static void StreamOut( UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); - RDTSC_STOP(FEStreamout, 1, 0); + AR_END(FEStreamout, 1); } ////////////////////////////////////////////////////////////////////////// @@ -698,7 +700,9 @@ static void GeometryShaderStage( uint32_t* pSoPrimData, simdscalari primID) { - RDTSC_START(FEGeometryShader); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(FEGeometryShader, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_GS_STATE* pState = &state.gsState; @@ -895,7 +899,7 @@ static void GeometryShaderStage( UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); - RDTSC_STOP(FEGeometryShader, 1, 0); + AR_END(FEGeometryShader, 1); } ////////////////////////////////////////////////////////////////////////// @@ -990,6 +994,7 @@ static void TessellationStages( uint32_t* pSoPrimData, simdscalari primID) { + SWR_CONTEXT *pContext = pDC->pContext; const API_STATE& state = GetApiState(pDC); const SWR_TS_STATE& tsState = state.tsState; @@ -1053,9 +1058,9 @@ static void TessellationStages( hsContext.mask = GenerateMask(numPrims); // Run the HS - RDTSC_START(FEHullShader); + AR_BEGIN(FEHullShader, pDC->drawId); state.pfnHsFunc(GetPrivateState(pDC), &hsContext); - RDTSC_STOP(FEHullShader, 0, 0); + AR_END(FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); @@ -1065,9 +1070,9 @@ static void TessellationStages( { // Run Tessellator SWR_TS_TESSELLATED_DATA tsData = { 0 }; - RDTSC_START(FETessellation); + AR_BEGIN(FETessellation, pDC->drawId); TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); - RDTSC_STOP(FETessellation, 0, 0); + AR_END(FETessellation, 0); if (tsData.NumPrimitives == 0) { @@ -1107,9 +1112,9 @@ static void TessellationStages( { dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); - RDTSC_START(FEDomainShader); + AR_BEGIN(FEDomainShader, pDC->drawId); state.pfnDsFunc(GetPrivateState(pDC), &dsContext); - RDTSC_STOP(FEDomainShader, 0, 0); + AR_END(FEDomainShader, 0); dsInvocations += KNOB_SIMD_WIDTH; } @@ -1142,12 +1147,12 @@ static void TessellationStages( if (HasRastT::value) { simdvector prim[3]; // Only deal with triangles, lines, or points - RDTSC_START(FEPAAssemble); + AR_BEGIN(FEPAAssemble, pDC->drawId); #if SWR_ENABLE_ASSERTS bool assemble = #endif tessPa.Assemble(VERTEX_POSITION_SLOT, prim); - RDTSC_STOP(FEPAAssemble, 1, 0); + AR_END(FEPAAssemble, 1); SWR_ASSERT(assemble); SWR_ASSERT(pfnClipFunc); @@ -1196,7 +1201,7 @@ void ProcessDraw( } #endif - RDTSC_START(FEProcessDraw); + AR_BEGIN(FEProcessDraw, pDC->drawId); DRAW_WORK& work = *(DRAW_WORK*)pUserData; const API_STATE& state = GetApiState(pDC); @@ -1334,9 +1339,9 @@ void ProcessDraw( { // 1. Execute FS/VS for a single SIMD. - RDTSC_START(FEFetchShader); + AR_BEGIN(FEFetchShader, pDC->drawId); state.pfnFetchFunc(fetchInfo, vin); - RDTSC_STOP(FEFetchShader, 0, 0); + AR_END(FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader vsContext.VertexID = fetchInfo.VertexID; @@ -1356,9 +1361,9 @@ void ProcessDraw( if (!KNOB_TOSS_FETCH) #endif { - RDTSC_START(FEVertexShader); + AR_BEGIN(FEVertexShader, pDC->drawId); state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); - RDTSC_STOP(FEVertexShader, 0, 0); + AR_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); } @@ -1369,9 +1374,9 @@ void ProcessDraw( { simdvector prim[MAX_NUM_VERTS_PER_PRIM]; // PaAssemble returns false if there is not enough verts to assemble. - RDTSC_START(FEPAAssemble); + AR_BEGIN(FEPAAssemble, pDC->drawId); bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim); - RDTSC_STOP(FEPAAssemble, 1, 0); + AR_END(FEPAAssemble, 1); #if KNOB_ENABLE_TOSS_POINTS if (!KNOB_TOSS_FETCH) @@ -1428,7 +1433,7 @@ void ProcessDraw( pa.Reset(); } - RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId); + AR_END(FEProcessDraw, numPrims * work.numInstances); } struct FEDrawChooser @@ -1787,7 +1792,9 @@ void BinTriangles( simdscalari primID, simdscalari viewportIdx) { - RDTSC_START(FEBinTriangles); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(FEBinTriangles, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -2168,7 +2175,7 @@ void BinTriangles( } endBinTriangles: - RDTSC_STOP(FEBinTriangles, 1, 0); + AR_END(FEBinTriangles, 1); } struct FEBinTrianglesChooser @@ -2204,7 +2211,9 @@ void BinPoints( simdscalari primID, simdscalari viewportIdx) { - RDTSC_START(FEBinPoints); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(FEBinPoints, pDC->drawId); simdvector& primVerts = prim[0]; @@ -2519,10 +2528,7 @@ void BinPoints( } } - - - - RDTSC_STOP(FEBinPoints, 1, 0); + AR_END(FEBinPoints, 1); } ////////////////////////////////////////////////////////////////////////// @@ -2542,7 +2548,9 @@ void BinLines( simdscalari primID, simdscalari viewportIdx) { - RDTSC_START(FEBinLines); + SWR_CONTEXT *pContext = pDC->pContext; + + AR_BEGIN(FEBinLines, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -2765,5 +2773,5 @@ void BinLines( endBinLines: - RDTSC_STOP(FEBinLines, 1, 0); + AR_END(FEBinLines, 1); } diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index c9380dac2f..6d4e50408f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -758,7 +758,7 @@ INLINE bool TrivialAcceptTest<std::false_type>(const int mask0, const int mask1, template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT> struct GenerateSVInnerCoverage { - INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, EDGE*, double*, uint64_t &){}; + INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t &){}; }; ////////////////////////////////////////////////////////////////////////// @@ -768,8 +768,10 @@ struct GenerateSVInnerCoverage template <typename RT> struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT> { - INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask) + INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask) { + SWR_CONTEXT *pContext = pDC->pContext; + double startQuadEdgesAdj[RT::NumEdgesT::value]; for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e) { @@ -777,9 +779,9 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT> } // not trivial accept or reject, must rasterize full tile - RDTSC_START(BERasterizePartial); + AR_BEGIN(BERasterizePartial, pDC->drawId); innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges); - RDTSC_STOP(BERasterizePartial, 0, 0); + AR_END(BERasterizePartial, 0); } }; @@ -835,6 +837,7 @@ struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCov template <typename RT> void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc) { + SWR_CONTEXT *pContext = pDC->pContext; const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc); #if KNOB_ENABLE_TOSS_POINTS if (KNOB_TOSS_BIN_TRIS) @@ -842,9 +845,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, return; } #endif - RDTSC_START(BERasterizeTriangle); + AR_BEGIN(BERasterizeTriangle, pDC->drawId); + AR_BEGIN(BETriangleSetup, pDC->drawId); - RDTSC_START(BETriangleSetup); const API_STATE &state = GetApiState(pDC); const SWR_RASTSTATE &rastState = state.rastState; const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs; @@ -1009,7 +1012,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0); - RDTSC_STOP(BETriangleSetup, 0, pDC->drawId); + AR_END(BETriangleSetup, 0); // update triangle desc uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); @@ -1022,11 +1025,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, if (numTilesX == 0 || numTilesY == 0) { RDTSC_EVENT(BEEmptyTriangle, 1, 0); - RDTSC_STOP(BERasterizeTriangle, 1, 0); + AR_END(BERasterizeTriangle, 1); return; } - RDTSC_START(BEStepSetup); + AR_BEGIN(BEStepSetup, pDC->drawId); // Step to pixel center of top-left pixel of the triangle bbox // Align intersect bbox (top/left) to raster tile's (top/left). @@ -1134,7 +1137,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } } - RDTSC_STOP(BEStepSetup, 0, pDC->drawId); + AR_END(BEStepSetup, 0); uint32_t tY = minTileY; uint32_t tX = minTileX; @@ -1226,14 +1229,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } // not trivial accept or reject, must rasterize full tile - RDTSC_START(BERasterizePartial); + AR_BEGIN(BERasterizePartial, pDC->drawId); triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges); - RDTSC_STOP(BERasterizePartial, 0, 0); + AR_END(BERasterizePartial, 0); triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; // Output SV InnerCoverage, if needed - GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, rastEdges, startQuadEdges, triDesc.innerCoverageMask); + GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask); } } else @@ -1264,9 +1267,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage); } - RDTSC_START(BEPixelBackend); + AR_BEGIN(BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers); - RDTSC_STOP(BEPixelBackend, 0, 0); + AR_END(BEPixelBackend, 0); } // step to the next tile in X @@ -1285,7 +1288,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow); } - RDTSC_STOP(BERasterizeTriangle, 1, 0); + AR_END(BERasterizeTriangle, 1); } void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) @@ -1420,6 +1423,8 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) { + SWR_CONTEXT *pContext = pDC->pContext; + #if KNOB_ENABLE_TOSS_POINTS if (KNOB_TOSS_BIN_TRIS) { @@ -1475,9 +1480,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, renderBuffers, triDesc.triFlags.renderTargetArrayIndex); - RDTSC_START(BEPixelBackend); + AR_BEGIN(BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers); - RDTSC_STOP(BEPixelBackend, 0, 0); + AR_END(BEPixelBackend, 0); } // Get pointers to hot tile memory for color RT, depth, stencil @@ -1561,6 +1566,7 @@ INLINE void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, Render void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) { + SWR_CONTEXT *pContext = pDC->pContext; const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData); #if KNOB_ENABLE_TOSS_POINTS if (KNOB_TOSS_BIN_TRIS) @@ -1570,7 +1576,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi #endif // bloat line to two tris and call the triangle rasterizer twice - RDTSC_START(BERasterizeLine); + AR_BEGIN(BERasterizeLine, pDC->drawId); const API_STATE &state = GetApiState(pDC); const SWR_RASTSTATE &rastState = state.rastState; @@ -1763,7 +1769,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); } - RDTSC_STOP(BERasterizeLine, 1, 0); + AR_END(BERasterizeLine, 1); } struct RasterizerChooser diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 24e7812308..446e795fb2 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -501,7 +501,7 @@ void WorkOnFifoBE( { BE_WORK *pWork; - RDTSC_START(WorkerFoundWork); + AR_BEGIN(WorkerFoundWork, pDC->drawId); uint32_t numWorkItems = tile->getNumQueued(); SWR_ASSERT(numWorkItems); @@ -510,7 +510,7 @@ void WorkOnFifoBE( SWR_ASSERT(pWork); if (pWork->type == DRAW) { - pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, tileID); + pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID); } while ((pWork = tile->peek()) != nullptr) @@ -518,7 +518,7 @@ void WorkOnFifoBE( pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile->dequeue(); } - RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId); + AR_END(WorkerFoundWork, numWorkItems); _ReadWriteBarrier(); @@ -735,12 +735,12 @@ DWORD workerThreadMain(LPVOID pData) break; } - RDTSC_START(WorkerWaitForThreadEvent); + AR_BEGIN(WorkerWaitForThreadEvent, 0); pContext->FifosNotEmpty.wait(lock); lock.unlock(); - RDTSC_STOP(WorkerWaitForThreadEvent, 0, 0); + AR_END(WorkerWaitForThreadEvent, 0); if (pContext->threadPool.inThreadShutdown) { @@ -750,9 +750,9 @@ DWORD workerThreadMain(LPVOID pData) if (IsBEThread) { - RDTSC_START(WorkerWorkOnFifoBE); + AR_BEGIN(WorkerWorkOnFifoBE, 0); WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask); - RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0); + AR_END(WorkerWorkOnFifoBE, 0); WorkOnCompute(pContext, workerId, curDrawBE); } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index 1bd1805b52..bd189abb1a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -281,7 +281,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile) /// to avoid unnecessary setup every triangle /// @todo support deferred clear /// @param pCreateInfo - pointer to creation info. -void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID) +void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID) { const API_STATE& state = GetApiState(pDC); @@ -301,19 +301,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearColorHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } colorHottileEnableMask &= ~(1 << rtSlot); } @@ -324,19 +324,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearDepthHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } } @@ -346,19 +346,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - RDTSC_START(BELoadTiles); + AR_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearStencilHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - RDTSC_STOP(BELoadTiles, 0, 0); + AR_END(BELoadTiles, 0); } } } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h index 4ec02838ab..2befe97e7c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h @@ -291,7 +291,7 @@ public: } } - void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID); + void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID); HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, uint32_t renderTargetArrayIndex = 0); |