summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Rowley <timothy.o.rowley@intel.com>2017-09-06 14:59:33 -0500
committerTim Rowley <timothy.o.rowley@intel.com>2017-09-13 10:09:47 -0500
commitead0dfe31ec7a1b1928e4abbfa99d59e0e5e929a (patch)
treef7dd741cc766f764474b850c40d566990292b903
parent1ccf9ad280415536056095314b470156e29b057e (diff)
swr/rast: adjust linux cpu topology identification code
Make more robust to handle strange strange configurations like a vmware exported 4-way numa X 1-core configuration. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp81
1 files changed, 38 insertions, 43 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index b704d23f54..4bb395dec3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
std::ifstream input("/proc/cpuinfo");
std::string line;
char* c;
- uint32_t threadId = uint32_t(-1);
+ uint32_t procId = uint32_t(-1);
uint32_t coreId = uint32_t(-1);
- uint32_t numaId = uint32_t(-1);
+ uint32_t physId = uint32_t(-1);
while (std::getline(input, line))
{
if (line.find("processor") != std::string::npos)
{
- if (threadId != uint32_t(-1))
- {
- // Save information.
- if (out_nodes.size() <= numaId)
- {
- out_nodes.resize(numaId + 1);
- }
-
- auto& numaNode = out_nodes[numaId];
- if (numaNode.cores.size() <= coreId)
- {
- numaNode.cores.resize(coreId + 1);
- }
-
- auto& core = numaNode.cores[coreId];
- core.procGroup = coreId;
- core.threadIds.push_back(threadId);
-
- out_numThreadsPerProcGroup++;
- }
-
auto data_start = line.find(": ") + 2;
- threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ procId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.find("core id") != std::string::npos)
@@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
if (line.find("physical id") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
- numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ physId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
+ if (line.length() == 0)
+ {
+ if (physId + 1 > out_nodes.size())
+ out_nodes.resize(physId + 1);
+ auto& numaNode = out_nodes[physId];
+ numaNode.numaId = physId;
+
+ if (coreId + 1 > numaNode.cores.size())
+ numaNode.cores.resize(coreId + 1);
+ auto& core = numaNode.cores[coreId];
+ core.procGroup = coreId;
+ core.threadIds.push_back(procId);
+ }
}
- if (threadId != uint32_t(-1))
+ out_numThreadsPerProcGroup = 0;
+ for (auto &node : out_nodes)
{
- // Save information.
- if (out_nodes.size() <= numaId)
+ for (auto &core : node.cores)
{
- out_nodes.resize(numaId + 1);
+ out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup,
+ core.threadIds.size());
}
- auto& numaNode = out_nodes[numaId];
- numaNode.numaId = numaId;
- if (numaNode.cores.size() <= coreId)
- {
- numaNode.cores.resize(coreId + 1);
- }
- auto& core = numaNode.cores[coreId];
-
- core.procGroup = coreId;
- core.threadIds.push_back(threadId);
- out_numThreadsPerProcGroup++;
}
#else
@@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
CPU_ZERO(&cpuset);
CPU_SET(threadId, &cpuset);
- pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+ int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+ if (err != 0)
+ {
+ fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
+ }
#endif
}
@@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
}
else
{
- pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+ // numa distribution assumes workers on all nodes
+ bool useNuma = true;
+ if (numCoresPerNode * numHyperThreads == 1)
+ useNuma = false;
+
+ if (useNuma) {
+ pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+ } else {
+ pPool->numaMask = 0;
+ }
uint32_t workerId = 0;
for (uint32_t n = 0; n < numNodes; ++n)
@@ -1064,7 +1059,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
pPool->pThreadData[workerId].workerId = workerId;
pPool->pThreadData[workerId].procGroupId = core.procGroup;
pPool->pThreadData[workerId].threadId = core.threadIds[t];
- pPool->pThreadData[workerId].numaId = node.numaId;
+ pPool->pThreadData[workerId].numaId = useNuma ? n : 0;
pPool->pThreadData[workerId].coreId = c;
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;