summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Rowley <timothy.o.rowley@intel.com>2016-09-12 13:08:12 -0500
committerTim Rowley <timothy.o.rowley@intel.com>2016-09-19 20:10:19 -0500
commit92ec820244710e1b13267d8e93f3a81d7114080e (patch)
treea74770d139b19a346c5169091ccc4360f50818fd
parentfdf28904235c1d2551cae60b46032f2e9a30e271 (diff)
swr: [rasterizer core] Better thread destruction
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp88
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp30
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp61
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h3
8 files changed, 126 insertions, 69 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index df87d14ca3..703f239cc0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -157,46 +157,6 @@ HANDLE SwrCreateContext(
return (HANDLE)pContext;
}
-void SwrDestroyContext(HANDLE hContext)
-{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DestroyThreadPool(pContext, &pContext->threadPool);
-
- // free the fifos
- for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i)
- {
- delete [] pContext->dcRing[i].dynState.pStats;
- delete pContext->dcRing[i].pArena;
- delete pContext->dsRing[i].pArena;
- pContext->pMacroTileManagerArray[i].~MacroTileMgr();
- pContext->pDispatchQueueArray[i].~DispatchQueue();
- }
-
- AlignedFree(pContext->pDispatchQueueArray);
- AlignedFree(pContext->pMacroTileManagerArray);
-
- // Free scratch space.
- for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
- {
-#if defined(_WIN32)
- VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE);
-#else
- AlignedFree(pContext->ppScratch[i]);
-#endif
-
- ArchRast::DestroyThreadContext(pContext->pArContext[i]);
- }
-
- delete [] pContext->ppScratch;
- delete [] pContext->pArContext;
- delete [] pContext->pStats;
-
- delete(pContext->pHotTileMgr);
-
- pContext->~SWR_CONTEXT();
- AlignedFree(GetContext(hContext));
-}
-
void CopyState(DRAW_STATE& dst, const DRAW_STATE& src)
{
memcpy(&dst.state, &src.state, sizeof(API_STATE));
@@ -382,6 +342,54 @@ API_STATE* GetDrawState(SWR_CONTEXT *pContext)
return &pDC->pState->state;
}
+void SwrDestroyContext(HANDLE hContext)
+{
+ SWR_CONTEXT *pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+
+ pDC->FeWork.type = SHUTDOWN;
+ pDC->FeWork.pfnWork = ProcessShutdown;
+
+ //enqueue
+ QueueDraw(pContext);
+
+ DestroyThreadPool(pContext, &pContext->threadPool);
+
+ // free the fifos
+ for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i)
+ {
+ delete[] pContext->dcRing[i].dynState.pStats;
+ delete pContext->dcRing[i].pArena;
+ delete pContext->dsRing[i].pArena;
+ pContext->pMacroTileManagerArray[i].~MacroTileMgr();
+ pContext->pDispatchQueueArray[i].~DispatchQueue();
+ }
+
+ AlignedFree(pContext->pDispatchQueueArray);
+ AlignedFree(pContext->pMacroTileManagerArray);
+
+ // Free scratch space.
+ for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
+ {
+#if defined(_WIN32)
+ VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE);
+#else
+ AlignedFree(pContext->ppScratch[i]);
+#endif
+
+ ArchRast::DestroyThreadContext(pContext->pArContext[i]);
+ }
+
+ delete[] pContext->ppScratch;
+ delete[] pContext->pArContext;
+ delete[] pContext->pStats;
+
+ delete(pContext->pHotTileMgr);
+
+ pContext->~SWR_CONTEXT();
+ AlignedFree(GetContext(hContext));
+}
+
void SWR_API SwrSaveState(
HANDLE hContext,
void* pOutputStateBlock,
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index d3d114ecdb..0a0001d077 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -78,6 +78,16 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
AR_END(BEDispatch, 1);
}
+//////////////////////////////////////////////////////////////////////////
+/// @brief Process shutdown.
+/// @param pDC - pointer to draw context (dispatch).
+/// @param workerId - The unique worker ID that is assigned to this thread.
+/// @param threadGroupId - the linear index for the thread group within the dispatch.
+void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+{
+ // Dummy function
+}
+
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
{
uint32_t x, y;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index 9d2f317f31..e19a53d6b0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -38,6 +38,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers);
void InitClearTilesTable();
simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index a4dbbc5280..dfcc1c0d39 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -158,6 +158,7 @@ enum WORK_TYPE
CLEAR,
DISCARDINVALIDATETILES,
STORETILES,
+ SHUTDOWN,
};
struct BE_WORK
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index decc161f1f..5d549873f3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -81,6 +81,36 @@ void ProcessSync(
}
//////////////////////////////////////////////////////////////////////////
+/// @brief FE handler for SwrDestroyContext.
+/// @param pContext - pointer to SWR context.
+/// @param pDC - pointer to draw context.
+/// @param workerId - thread's worker id. Even thread has a unique id.
+/// @param pUserData - Pointer to user data passed back to sync callback.
+void ProcessShutdown(
+ SWR_CONTEXT *pContext,
+ DRAW_CONTEXT *pDC,
+ uint32_t workerId,
+ void *pUserData)
+{
+ BE_WORK work;
+ work.type = SHUTDOWN;
+ work.pfnWork = ProcessShutdownBE;
+
+ MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ // Enqueue at least 1 work item for each worker thread
+ // account for number of numa nodes
+ uint32_t numNumaNodes = pContext->threadPool.numaMask + 1;
+
+ for (uint32_t i = 0; i < pContext->threadPool.numThreads; ++i)
+ {
+ for (uint32_t n = 0; n < numNumaNodes; ++n)
+ {
+ pTileMgr->enqueue(i, n, &work);
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
/// @brief FE handler for SwrClearRenderTarget.
/// @param pContext - pointer to SWR context.
/// @param pDC - pointer to draw context.
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index 6316156bfd..46924947a7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -304,6 +304,7 @@ void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, v
void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 446e795fb2..b1a27f34c2 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -428,7 +428,8 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE,
/// still have work pending in a previous draw. Additionally, the lockedTiles is
/// hueristic that can steer a worker back to the same macrotile that it had been
/// working on in a previous draw.
-void WorkOnFifoBE(
+/// @returns true if worker thread should shutdown
+bool WorkOnFifoBE(
SWR_CONTEXT *pContext,
uint32_t workerId,
uint32_t &curDrawBE,
@@ -436,12 +437,14 @@ void WorkOnFifoBE(
uint32_t numaNode,
uint32_t numaMask)
{
+ bool bShutdown = false;
+
// Find the first incomplete draw that has pending work. If no such draw is found then
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
uint32_t drawEnqueued = 0;
if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
{
- return;
+ return false;
}
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
@@ -458,17 +461,17 @@ void WorkOnFifoBE(
{
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute) return; // We don't look at compute work.
+ if (pDC->isCompute) return false; // We don't look at compute work.
// First wait for FE to be finished with this draw. This keeps threading model simple
// but if there are lots of bubbles between draws then serializing FE and BE may
// need to be revisited.
- if (!pDC->doneFE) return;
+ if (!pDC->doneFE) return false;
// If this draw is dependent on a previous draw then we need to bail.
if (CheckDependency(pContext, pDC, lastRetiredDraw))
{
- return;
+ return false;
}
// Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
@@ -512,6 +515,10 @@ void WorkOnFifoBE(
{
pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID);
}
+ else if (pWork->type == SHUTDOWN)
+ {
+ bShutdown = true;
+ }
while ((pWork = tile->peek()) != nullptr)
{
@@ -526,7 +533,7 @@ void WorkOnFifoBE(
// Optimization: If the draw is complete and we're the last one to have worked on it then
// we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete.
- if ((curDrawBE == i) && pDC->pTileMgr->isWorkComplete())
+ if ((curDrawBE == i) && (bShutdown || pDC->pTileMgr->isWorkComplete()))
{
// We can increment the current BE and safely move to next draw since we know this draw is complete.
curDrawBE++;
@@ -537,6 +544,11 @@ void WorkOnFifoBE(
lockedTiles.clear();
break;
}
+
+ if (bShutdown)
+ {
+ break;
+ }
}
else
{
@@ -545,6 +557,8 @@ void WorkOnFifoBE(
}
}
}
+
+ return bShutdown;
}
//////////////////////////////////////////////////////////////////////////
@@ -710,8 +724,15 @@ DWORD workerThreadMain(LPVOID pData)
uint32_t curDrawBE = 0;
uint32_t curDrawFE = 0;
- while (pContext->threadPool.inThreadShutdown == false)
+ bool bShutdown = false;
+
+ while (true)
{
+ if (bShutdown && !threadHasWork(curDrawBE))
+ {
+ break;
+ }
+
uint32_t loop = 0;
while (loop++ < KNOB_WORKER_SPIN_LOOP_COUNT && !threadHasWork(curDrawBE))
{
@@ -729,29 +750,18 @@ DWORD workerThreadMain(LPVOID pData)
continue;
}
- if (pContext->threadPool.inThreadShutdown)
- {
- lock.unlock();
- break;
- }
-
AR_BEGIN(WorkerWaitForThreadEvent, 0);
pContext->FifosNotEmpty.wait(lock);
lock.unlock();
AR_END(WorkerWaitForThreadEvent, 0);
-
- if (pContext->threadPool.inThreadShutdown)
- {
- break;
- }
}
if (IsBEThread)
{
AR_BEGIN(WorkerWorkOnFifoBE, 0);
- WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
+ bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
AR_END(WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
@@ -918,7 +928,6 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->numThreads = numThreads;
pContext->NumWorkerThreads = pPool->numThreads;
- pPool->inThreadShutdown = false;
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
pPool->numaMask = 0;
@@ -1001,17 +1010,15 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
if (!pContext->threadInfo.SINGLE_THREADED)
{
- // Inform threads to finish up
- std::unique_lock<std::mutex> lock(pContext->WaitLock);
- pPool->inThreadShutdown = true;
- _mm_mfence();
- pContext->FifosNotEmpty.notify_all();
- lock.unlock();
+ // Wait for all threads to finish
+ SwrWaitForIdle(pContext);
// Wait for threads to finish and destroy them
for (uint32_t t = 0; t < pPool->numThreads; ++t)
{
- pPool->pThreads[t]->join();
+ // Detach from thread. Cannot join() due to possibility (in Windows) of code
+ // in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
+ pPool->pThreads[t]->detach();
delete(pPool->pThreads[t]);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index 05231c5a38..c802c576fc 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -54,7 +54,6 @@ struct THREAD_POOL
THREAD_PTR* pThreads;
uint32_t numThreads;
uint32_t numaMask;
- volatile bool inThreadShutdown;
THREAD_DATA *pThreadData;
};
@@ -65,6 +64,6 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
// Expose FE and BE worker functions to the API thread if single threaded
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE);
-void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
+bool WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE);
int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);