summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Kyriazis <george.kyriazis@intel.com>2018-05-01 19:33:38 -0500
committerGeorge Kyriazis <george.kyriazis@intel.com>2018-05-11 11:26:35 -0500
commit4e52cb51b56eaae7153394ed712f49ce0ba63bcc (patch)
tree1c28698af74a76fb9d5d9d097d01dbae0653de88
parent8238c791dcd244c5d242b0e61cbc744ed64e5e23 (diff)
swr/rast: Thread locked tiles improvement
- Change tilemgr TILE_ID encoding to use Morton-order (Z-order). - Change locked tiles set to bitset. Makes clear, set, get much faster. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp11
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp31
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h20
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tileset.h105
7 files changed, 152 insertions, 24 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 3458793fd8..47f3633d54 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -42,6 +42,7 @@
#include "core/tilemgr.h"
#include "core/clip.h"
#include "core/utils.h"
+#include "core/tileset.h"
#include "common/os.h"
@@ -139,6 +140,11 @@ HANDLE SwrCreateContext(
BindApiThread(pContext, 0);
}
+ if (pContext->threadInfo.SINGLE_THREADED)
+ {
+ pContext->pSingleThreadLockedTiles = new TileSet();
+ }
+
pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
@@ -245,7 +251,7 @@ void QueueWork(SWR_CONTEXT *pContext)
{
uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
WorkOnFifoFE(pContext, 0, curDraw[0]);
- WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0);
+ WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
}
else
{
@@ -427,7 +433,8 @@ void SwrDestroyContext(HANDLE hContext)
delete[] pContext->ppScratch;
AlignedFree(pContext->pStats);
- delete(pContext->pHotTileMgr);
+ delete pContext->pHotTileMgr;
+ delete pContext->pSingleThreadLockedTiles;
pContext->~SWR_CONTEXT();
AlignedFree(GetContext(hContext));
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index af8f4b8db4..2cd61e4abb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -516,7 +516,7 @@ struct SWR_CONTEXT
uint32_t lastFrameChecked;
uint64_t lastDrawChecked;
- TileSet singleThreadLockedTiles;
+ TileSet* pSingleThreadLockedTiles;
// ArchRast thread contexts.
HANDLE* pArContext;
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 9e16246c3f..f77ae22a80 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -49,6 +49,7 @@
#include "rasterizer.h"
#include "rdtsc_core.h"
#include "tilemgr.h"
+#include "tileset.h"
@@ -587,7 +588,7 @@ bool WorkOnFifoBE(
}
// can only work on this draw if it's not in use by other threads
- if (lockedTiles.find(tileID) != lockedTiles.end())
+ if (lockedTiles.get(tileID))
{
continue;
}
@@ -645,7 +646,7 @@ bool WorkOnFifoBE(
else
{
// This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
- lockedTiles.insert(tileID);
+ lockedTiles.set(tileID);
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index cb918ddb60..0489a3cc6c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -62,7 +62,7 @@ struct THREAD_POOL
THREAD_DATA *pApiThreadData;
};
-typedef std::unordered_set<uint32_t> TileSet;
+struct TileSet;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index 28fa787711..1bdef4bd7d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -33,8 +33,6 @@
#include "core/multisample.h"
#include "rdtsc_core.h"
-#define TILE_ID(x,y) ((x << 16 | y))
-
MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
@@ -50,26 +48,35 @@ void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
return;
}
- uint32_t id = TILE_ID(x, y);
+ uint32_t id = getTileId(x, y);
+
+ if (id >= mTiles.size())
+ {
+ mTiles.resize((16 + id) * 2);
+ }
- MacroTileQueue &tile = mTiles[id];
- tile.mWorkItemsFE++;
- tile.mId = id;
+ MacroTileQueue *pTile = mTiles[id];
+ if (!pTile)
+ {
+ pTile = mTiles[id] = new MacroTileQueue();
+ }
+ pTile->mWorkItemsFE++;
+ pTile->mId = id;
- if (tile.mWorkItemsFE == 1)
+ if (pTile->mWorkItemsFE == 1)
{
- tile.clear(mArena);
- mDirtyTiles.push_back(&tile);
+ pTile->clear(mArena);
+ mDirtyTiles.push_back(pTile);
}
mWorkItemsProduced++;
- tile.enqueue_try_nosync(mArena, pWork);
+ pTile->enqueue_try_nosync(mArena, pWork);
}
void MacroTileMgr::markTileComplete(uint32_t id)
{
- SWR_ASSERT(mTiles.find(id) != mTiles.end());
- MacroTileQueue &tile = mTiles[id];
+ SWR_ASSERT(mTiles.size() > id);
+ MacroTileQueue &tile = *mTiles[id];
uint32_t numTiles = tile.mWorkItemsFE;
InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index 2831010b12..8392db1b05 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -31,6 +31,7 @@
#include <set>
#include <unordered_map>
#include "common/formats.h"
+#include "common/intrin.h"
#include "fifo.hpp"
#include "context.h"
#include "format_traits.h"
@@ -41,7 +42,7 @@
struct MacroTileQueue
{
MacroTileQueue() { }
- ~MacroTileQueue() { }
+ ~MacroTileQueue() { destroy(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Returns number of work items queued for this tile.
@@ -110,9 +111,9 @@ public:
MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
- for (auto &tile : mTiles)
+ for (auto *pTile : mTiles)
{
- tile.second.destroy();
+ delete pTile;
}
}
@@ -136,13 +137,20 @@ public:
static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
{
- y = tileID & 0xffff;
- x = (tileID >> 16) & 0xffff;
+ // Morton / Z order of tiles
+ x = pext_u32(tileID, 0x55555555);
+ y = pext_u32(tileID, 0xAAAAAAAA);
+ }
+
+ static INLINE uint32_t getTileId(uint32_t x, uint32_t y)
+ {
+ // Morton / Z order of tiles
+ return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA);
}
private:
CachingArena& mArena;
- std::unordered_map<uint32_t, MacroTileQueue> mTiles;
+ std::vector<MacroTileQueue*> mTiles;
// Any tile that has work queued to it is a dirty tile.
std::vector<MacroTileQueue*> mDirtyTiles;
diff --git a/src/gallium/drivers/swr/rasterizer/core/tileset.h b/src/gallium/drivers/swr/rasterizer/core/tileset.h
new file mode 100644
index 0000000000..3eb4c5d1f0
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/core/tileset.h
@@ -0,0 +1,105 @@
+/****************************************************************************
+* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file tileset.h
+*
+* @brief Custom bitset class for managing locked tiles
+*
+******************************************************************************/
+#pragma once
+
+struct TileSet
+{
+ ~TileSet()
+ {
+ if (m_bits)
+ {
+ AlignedFree(m_bits);
+ }
+ }
+ INLINE void set(size_t idx)
+ {
+ _grow(idx);
+ size_t& word = _get_word(idx);
+ word |= (size_t(1) << (idx & BITS_OFFSET));
+ m_maxSet = std::max(m_maxSet, idx + 1);
+ }
+ INLINE bool get(size_t idx)
+ {
+ if (idx >= m_size)
+ {
+ return false;
+ }
+ size_t word = _get_word(idx);
+ return 0 != (word & (size_t(1) << (idx & BITS_OFFSET)));
+ }
+
+ INLINE void clear()
+ {
+ if (m_maxSet)
+ {
+ size_t num_words = (m_maxSet + BITS_OFFSET) / BITS_PER_WORD;
+ memset(m_bits, 0, sizeof(size_t) * num_words);
+ m_maxSet = 0;
+ }
+ }
+
+private:
+ static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
+ static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
+
+ size_t m_size = 0;
+ size_t m_maxSet = 0;
+ size_t* m_bits = nullptr;
+
+ INLINE size_t& _get_word(size_t idx)
+ {
+ return m_bits[idx / BITS_PER_WORD];
+ }
+
+ void _grow(size_t idx)
+ {
+ if (idx < m_size)
+ {
+ return;
+ }
+
+ size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
+ size_t num_words = new_size / BITS_PER_WORD;
+ size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
+ size_t copy_words = 0;
+
+ if (m_bits)
+ {
+ copy_words = (m_size + BITS_OFFSET) / BITS_PER_WORD;
+ num_words -= copy_words;
+ memcpy(newBits, m_bits, copy_words * sizeof(size_t));
+
+ AlignedFree(m_bits);
+ }
+
+ m_bits = newBits;
+ m_size = new_size;
+
+ memset(&m_bits[copy_words], 0, sizeof(size_t) * num_words);
+ }
+};