diff options
author | Jose Fonseca <jfonseca@vmware.com> | 2016-01-11 15:06:06 +0000 |
---|---|---|
committer | Jose Fonseca <jfonseca@vmware.com> | 2016-01-11 15:06:06 +0000 |
commit | 8b76e208f3ba77f26017122fe9661d2221028f7a (patch) | |
tree | f9b30f7eee489b4630a56ae6795fdc91ae1896a5 /retrace | |
parent | 71a33a59fe2c36c092effb35aceb2ba7930cf90b (diff) | |
parent | 51879aa5782b638ff38d730fb06d857b69598ed9 (diff) |
Merge branch 'metric_abstraction'
Diffstat (limited to 'retrace')
-rw-r--r-- | retrace/CMakeLists.txt | 5 | ||||
-rw-r--r-- | retrace/glretrace.hpp | 20 | ||||
-rwxr-xr-x | retrace/glretrace_main.cpp | 166 | ||||
-rw-r--r-- | retrace/glretrace_ws.cpp | 4 | ||||
-rw-r--r-- | retrace/metric_backend.hpp | 285 | ||||
-rw-r--r-- | retrace/metric_backend_amd_perfmon.cpp | 449 | ||||
-rw-r--r-- | retrace/metric_backend_amd_perfmon.hpp | 178 | ||||
-rw-r--r-- | retrace/metric_backend_intel_perfquery.cpp | 361 | ||||
-rw-r--r-- | retrace/metric_backend_intel_perfquery.hpp | 170 | ||||
-rw-r--r-- | retrace/metric_backend_opengl.cpp | 456 | ||||
-rw-r--r-- | retrace/metric_backend_opengl.hpp | 173 | ||||
-rw-r--r-- | retrace/metric_helper.cpp | 192 | ||||
-rw-r--r-- | retrace/metric_writer.cpp | 199 | ||||
-rw-r--r-- | retrace/metric_writer.hpp | 99 | ||||
-rw-r--r-- | retrace/mmap_allocator.hpp | 143 | ||||
-rw-r--r-- | retrace/retrace.hpp | 9 | ||||
-rw-r--r-- | retrace/retrace_main.cpp | 92 |
17 files changed, 2985 insertions, 16 deletions
diff --git a/retrace/CMakeLists.txt b/retrace/CMakeLists.txt index cce69e14..7f58d7e4 100644 --- a/retrace/CMakeLists.txt +++ b/retrace/CMakeLists.txt @@ -78,6 +78,11 @@ add_library (glretrace_common STATIC glstate_params.cpp glstate_shaders.cpp glws.cpp + metric_helper.cpp + metric_writer.cpp + metric_backend_amd_perfmon.cpp + metric_backend_intel_perfquery.cpp + metric_backend_opengl.cpp ) add_dependencies (glretrace_common glproc) target_link_libraries (glretrace_common diff --git a/retrace/glretrace.hpp b/retrace/glretrace.hpp index 7e26d6de..fc36c525 100644 --- a/retrace/glretrace.hpp +++ b/retrace/glretrace.hpp @@ -27,6 +27,8 @@ #include "glws.hpp" #include "retrace.hpp" +#include "metric_backend.hpp" +#include "metric_writer.hpp" #include "os_thread.hpp" @@ -71,6 +73,14 @@ struct Context { } }; +extern bool metricBackendsSetup; +extern bool profilingContextAcquired; +extern bool profilingBoundaries[QUERY_BOUNDARY_LIST_END]; +extern unsigned profilingBoundariesIndex[QUERY_BOUNDARY_LIST_END]; +extern std::vector<MetricBackend*> metricBackends; +extern MetricBackend* curMetricBackend; +extern MetricWriter profiler; + extern glprofile::Profile defaultProfile; extern bool supportsARBShaderObjects; @@ -122,6 +132,8 @@ extern const retrace::Entry egl_callbacks[]; void frame_complete(trace::Call &call); void initContext(); +void beforeContextSwitch(); +void afterContextSwitch(); void updateDrawable(int width, int height); @@ -130,6 +142,14 @@ void flushQueries(); void beginProfile(trace::Call &call, bool isDraw); void endProfile(trace::Call &call, bool isDraw); +MetricBackend* getBackend(std::string backendName); + +bool isLastPass(); + +void listMetricsCLI(); + +void enableMetricsFromCLI(const char* metrics, QueryBoundary pollingRule); + GLenum blockOnFence(trace::Call &call, GLsync sync, GLbitfield flags); diff --git a/retrace/glretrace_main.cpp b/retrace/glretrace_main.cpp index 525822ad..c3783cc9 100755 --- a/retrace/glretrace_main.cpp +++ b/retrace/glretrace_main.cpp @@ -234,6 +234,32 @@ flushQueries() { void beginProfile(trace::Call &call, bool isDraw) { + if (retrace::profilingWithBackends) { + if (profilingBoundaries[QUERY_BOUNDARY_CALL] || + profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) { + if (curMetricBackend) { + curMetricBackend->beginQuery(isDraw ? QUERY_BOUNDARY_DRAWCALL : QUERY_BOUNDARY_CALL); + } + if (isLastPass() && curMetricBackend) { + Context *currentContext = getCurrentContext(); + GLuint program = currentContext ? currentContext->activeProgram : 0; + unsigned eventId = profilingBoundariesIndex[QUERY_BOUNDARY_CALL]++; + ProfilerCall::data callData = {false, + call.no, + program, + call.sig->name}; + if (profilingBoundaries[QUERY_BOUNDARY_CALL]) { + profiler.addQuery(QUERY_BOUNDARY_CALL, eventId, &callData); + } + if (isDraw && profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) { + eventId = profilingBoundariesIndex[QUERY_BOUNDARY_DRAWCALL]++; + profiler.addQuery(QUERY_BOUNDARY_DRAWCALL, eventId, &callData); + } + } + } + return; + } + glretrace::Context *currentContext = glretrace::getCurrentContext(); /* Create call query */ @@ -277,6 +303,15 @@ beginProfile(trace::Call &call, bool isDraw) { void endProfile(trace::Call &call, bool isDraw) { + if (retrace::profilingWithBackends) { + if (profilingBoundaries[QUERY_BOUNDARY_CALL] || + profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) { + if (curMetricBackend) { + curMetricBackend->endQuery(isDraw ? QUERY_BOUNDARY_DRAWCALL : QUERY_BOUNDARY_CALL); + } + } + return; + } /* CPU profiling for all calls */ if (retrace::profilingCpuTimes) { @@ -438,7 +473,32 @@ initContext() { void frame_complete(trace::Call &call) { - if (retrace::profiling) { + if (retrace::profilingWithBackends) { + if (profilingBoundaries[QUERY_BOUNDARY_CALL] || + profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) + { + if (isLastPass() && curMetricBackend) { + // frame end indicator + ProfilerCall::data callData = {true, 0, 0, ""}; + if (profilingBoundaries[QUERY_BOUNDARY_CALL]) { + profiler.addQuery(QUERY_BOUNDARY_CALL, 0, &callData); + } + if (profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) { + profiler.addQuery(QUERY_BOUNDARY_DRAWCALL, 0, &callData); + } + } + } + if (curMetricBackend) { + curMetricBackend->endQuery(QUERY_BOUNDARY_FRAME); + } + if (profilingBoundaries[QUERY_BOUNDARY_FRAME]) { + if (isLastPass() && curMetricBackend) { + profiler.addQuery(QUERY_BOUNDARY_FRAME, + profilingBoundariesIndex[QUERY_BOUNDARY_FRAME]++); + } + } + } + else if (retrace::profiling) { /* Complete any remaining queries */ flushQueries(); @@ -460,6 +520,81 @@ frame_complete(trace::Call &call) { !currentDrawable->visible) { retrace::warning(call) << "could not infer drawable size (glViewport never called)\n"; } + + if (curMetricBackend) { + curMetricBackend->beginQuery(QUERY_BOUNDARY_FRAME); + } +} + +void +beforeContextSwitch() +{ + if (profilingContextAcquired && retrace::profilingWithBackends && + curMetricBackend) + { + curMetricBackend->pausePass(); + } +} + +void +afterContextSwitch() +{ + + if (retrace::profilingListMetrics) { + listMetricsCLI(); + exit(0); + } + + if (retrace::profilingWithBackends) { + if (!metricBackendsSetup) { + if (retrace::profilingCallsMetricsString) { + enableMetricsFromCLI(retrace::profilingCallsMetricsString, + QUERY_BOUNDARY_CALL); + } + if (retrace::profilingFramesMetricsString) { + enableMetricsFromCLI(retrace::profilingFramesMetricsString, + QUERY_BOUNDARY_FRAME); + } + if (retrace::profilingDrawCallsMetricsString) { + enableMetricsFromCLI(retrace::profilingDrawCallsMetricsString, + QUERY_BOUNDARY_DRAWCALL); + } + unsigned numPasses = 0; + for (auto &b : metricBackends) { + b->generatePasses(); + numPasses += b->getNumPasses(); + } + retrace::numPasses = numPasses > 0 ? numPasses : 1; + if (retrace::profilingNumPasses) { + std::cout << retrace::numPasses << std::endl; + exit(0); + } + metricBackendsSetup = true; + } + + if (!profilingContextAcquired) { + unsigned numPasses = 0; + for (auto &b : metricBackends) { + numPasses += b->getNumPasses(); + if (retrace::curPass < numPasses) { + curMetricBackend = b; + b->beginPass(); // begin pass + break; + } + } + + if (curMetricBackend) { + curMetricBackend->beginQuery(QUERY_BOUNDARY_FRAME); + } + + profilingContextAcquired = true; + return; + } + + if (curMetricBackend) { + curMetricBackend->continuePass(); + } + } } @@ -671,10 +806,39 @@ retrace::flushRendering(void) { void retrace::finishRendering(void) { + if (profilingWithBackends && glretrace::curMetricBackend) { + (glretrace::curMetricBackend)->endQuery(QUERY_BOUNDARY_FRAME); + } + if (glretrace::profilingBoundaries[QUERY_BOUNDARY_FRAME]) { + if (glretrace::isLastPass() && glretrace::curMetricBackend) { + glretrace::profiler.addQuery(QUERY_BOUNDARY_FRAME, + glretrace::profilingBoundariesIndex[QUERY_BOUNDARY_FRAME]++); + } + } + glretrace::Context *currentContext = glretrace::getCurrentContext(); if (currentContext) { glFinish(); } + + if (retrace::profilingWithBackends) { + if (glretrace::curMetricBackend) { + (glretrace::curMetricBackend)->endPass(); + glretrace::profilingContextAcquired = false; + } + + if (glretrace::isLastPass()) { + if (glretrace::profilingBoundaries[QUERY_BOUNDARY_FRAME]) { + glretrace::profiler.writeAll(QUERY_BOUNDARY_FRAME); + } + if (glretrace::profilingBoundaries[QUERY_BOUNDARY_CALL]) { + glretrace::profiler.writeAll(QUERY_BOUNDARY_CALL); + } + if (glretrace::profilingBoundaries[QUERY_BOUNDARY_DRAWCALL]) { + glretrace::profiler.writeAll(QUERY_BOUNDARY_DRAWCALL); + } + } + } } void diff --git a/retrace/glretrace_ws.cpp b/retrace/glretrace_ws.cpp index cfac4f55..6ab2f18b 100644 --- a/retrace/glretrace_ws.cpp +++ b/retrace/glretrace_ws.cpp @@ -167,6 +167,8 @@ makeCurrent(trace::Call &call, glws::Drawable *drawable, Context *context) flushQueries(); + beforeContextSwitch(); + bool success = glws::makeCurrent(drawable, context ? context->wsContext : NULL); if (!success) { @@ -185,6 +187,8 @@ makeCurrent(trace::Call &call, glws::Drawable *drawable, Context *context) } } + afterContextSwitch(); + return true; } diff --git a/retrace/metric_backend.hpp b/retrace/metric_backend.hpp new file mode 100644 index 00000000..cf58e093 --- /dev/null +++ b/retrace/metric_backend.hpp @@ -0,0 +1,285 @@ +/************************************************************************** + * + * Copyright 2015 Alexander Trukhin + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + +#include <memory> +#include <string> + +/** + * Profiling boundary. + */ +enum QueryBoundary { + QUERY_BOUNDARY_DRAWCALL = 0, /**< draw call boundary */ + QUERY_BOUNDARY_FRAME, /**< frame boundary */ + QUERY_BOUNDARY_CALL, /**< any call boundary */ + QUERY_BOUNDARY_LIST_END +}; + +/** + * Numeric type of the metric. + */ +enum MetricNumType { + CNT_NUM_UINT = 0, + CNT_NUM_FLOAT, + CNT_NUM_UINT64, + CNT_NUM_DOUBLE, + CNT_NUM_BOOL, + CNT_NUM_INT64 +}; + +/** + * Type of data metric represents. + */ +enum MetricType { + CNT_TYPE_GENERIC = 0, /**< generally a number, comparable type */ + CNT_TYPE_NUM, /**< number, not necessarily comparable (e.g. event number) */ + CNT_TYPE_DURATION, /**< duration */ + CNT_TYPE_PERCENT, /**< percentage */ + CNT_TYPE_TIMESTAMP, /**< timestamp (e.g. GL_TIMESTAMP in OpenGL) */ + CNT_TYPE_OTHER /**< not listed above */ +}; + + +/** + * Metric interface. + * + * Each metric can be uniquely identified by a group id and a metric id. + * Each backend is assumed to implement its own version of Metric. It is also + * supposed to provide metric groups and corresponding metrics. + * This interface is used to communicate with backend and can be used + * to store internal data in backend. + * + * It is generally a good idea to cache some parameters (e.g. numeric type). + */ +class Metric +{ +public: + virtual ~Metric() {} + + /** + * Returns metric id + */ + virtual unsigned id() = 0; + + /** + * Returns metric group id + */ + virtual unsigned groupId() = 0; + + /** + * Returns metric name string + */ + virtual std::string name() = 0; + + /** + * Returns metric description string (or an empty string if not available) + */ + virtual std::string description() = 0; + + /** + * Returns metric numeric type + */ + virtual MetricNumType numType() = 0; + + /** + * Returns data type metric represents + */ + virtual MetricType type() = 0; +}; + +/** + * Callbacks for use in backend interface. + * int error : error code (0 - no error) + * void* userData : arbitrary pointer + */ +typedef void (*enumGroupsCallback)(unsigned group, int error, void* userData); +typedef void (*enumMetricsCallback)(Metric* metric, int error, void* userData); +typedef void (*enumDataCallback)(Metric* metric, int event, void* data, + int error, void* userData); + +/** + * Backend interface. + * + * Abstraction for metric-collection system. + * Such system is supposed to have its own version of MetricBackend. + * Backend can be used to query available metrics, to profile calls/frames and + * to collect metrics. + * Backend is responsible for storing metric data. + * + * Unfortunately, not all collection systems allow to collect all metrics + * at the same time. Therefore multiple passes are needed, this interface provides + * the mean to implement such behaviour. + * + * + * Typical workflow example: + * MetricBackend* backend; + * backend->enableMetric(...); + * ... + * backend->enableMetric(...); + * for (i=0; i < backend->generatePasses(); i++) { + * backend->beginPass(); + * + * backend->beginQuery(QUERY_BOUNDARY_FRAME); + * + * backend->beginQuery(QUERY_BOUNDARY_CALL or QUERY_BOUNDARY_DRAWCALL); + * ... profiled call ... + * backend->endQuery(QUERY_BOUNDARY_CALL or QUERY_BOUNDARY_DRAWCALL); + * + * ... + * + * backend->beginQuery(QUERY_BOUNDARY_CALL or QUERY_BOUNDARY_DRAWCALL); + * ... profiled call ... + * backend->endQuery(QUERY_BOUNDARY_CALL or QUERY_BOUNDARY_DRAWCALL); + * + * backend->endQuery(QUERY_BOUNDARY_FRAME); + * + * ... following frames ... + * + * backend->endPass(); + * } + * // collect data + * + * + * It is generally a good idea to implement MetricBackend as a singleton. + */ +class MetricBackend +{ +public: + virtual ~MetricBackend() {} + + /** + * Returns true if MetricBackend is supported on current HW. + */ + virtual bool isSupported() = 0; + + /** + * Enumerates metric groups, calls callback for each group. + */ + virtual void enumGroups(enumGroupsCallback callback, + void* userData = nullptr) = 0; + + /** + * Enumerates metrics in specified group, calls callback for each metric. + * Callback receives pointer to the metric object among other params. + * Metric object is an object of class derived from Metric. + */ + virtual void enumMetrics(unsigned group, enumMetricsCallback callback, + void* userData = nullptr) = 0; + + /** + * Returns group name string (or an empty string if not available). + */ + virtual std::string getGroupName(unsigned group) = 0; + + /** + * Returns pointer to the metric object with given group id, metric id. + * Metric object is an object of class derived from Metric. + */ + virtual std::unique_ptr<Metric> getMetricById(unsigned groupId, unsigned metricId) = 0; + + /** + * Returns pointer to the metric object with given metric name string. + * Metric object is an object of class derived from Metric. + */ + virtual std::unique_ptr<Metric> getMetricByName(std::string metricName) = 0; + + /** + * Adds given metric object to the internal list of metrics + * to be profiled. + * pollingRule sets the boundary for collecting metric + * Returns error code (0 - no error). + */ + virtual int enableMetric(Metric* metric, QueryBoundary pollingRule = QUERY_BOUNDARY_DRAWCALL) = 0; + + /** + * Generates passes based on enabled metrics. + * Returns number of generated passes. + */ + virtual unsigned generatePasses() = 0; + + /** + * Begins pass. Subsequent calls begin next passes. + * A pass needs to be ended before starting a new one. + */ + virtual void beginPass() = 0; + + /** + * Ends pass. + */ + virtual void endPass() = 0; + + /** + * Pause pass with all the queries in progress. + * Backend decides what to do with the data of interrupted + * query. + * Can be used before the context switch in OpenGl. + */ + virtual void pausePass() = 0; + + /** + * Continue profiling the pass after pausePass(). + * Backend decides whether to reprofile interrupted by pausePass() query. + * Can be used after the context switch in OpenGl. + */ + virtual void continuePass() = 0; + + /** + * Begins query (profiles unit, i.e. frames or calls). Subsequent calls + * begin next queries. + * Parameter boundary should be set to the type of boundary beginQuery/endQuery + * constructions enclose. + * A query needs to be ended before starting a new one. + */ + virtual void beginQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL) = 0; + + /** + * Ends query. + * Parameter boundary should be set to the type of boundary beginQuery/endQuery + * constructions enclose. + */ + virtual void endQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL) = 0; + + /** + * Enumerates collected metrics data for a given query id and given + * type of boundary. + * Query ids begin with 0 for first query. + * Metric data is passed to callback. + * + * The order in which metrics are returned can differ from the one in which + * metrics were enabled (via enableMetric(...)) . However, it should be + * guaranteed that order is the same for every query. + */ + virtual void enumDataQueryId(unsigned id, enumDataCallback callback, + QueryBoundary boundary, + void* userData = nullptr) = 0; + + /** + * Returns number of passes generated by generatePasses(...). + * If generatePasses(...) was not called returns 1. + */ + virtual unsigned getNumPasses() = 0; + +}; diff --git a/retrace/metric_backend_amd_perfmon.cpp b/retrace/metric_backend_amd_perfmon.cpp new file mode 100644 index 00000000..77a870b8 --- /dev/null +++ b/retrace/metric_backend_amd_perfmon.cpp @@ -0,0 +1,449 @@ +/************************************************************************** + * + * Copyright 2015 Alexander Trukhin + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#include "metric_backend_amd_perfmon.hpp" + +void Metric_AMD_perfmon::precache() { + GLenum type; + int length; + std::string name; + + glGetPerfMonitorCounterInfoAMD(m_group, m_id, GL_COUNTER_TYPE_AMD, &type); + if (type == GL_UNSIGNED_INT) m_nType = CNT_NUM_UINT; + else if (type == GL_FLOAT || type == GL_PERCENTAGE_AMD) m_nType = CNT_NUM_FLOAT; + else if (type == GL_UNSIGNED_INT64_AMD) m_nType = CNT_NUM_UINT64; + else m_nType = CNT_NUM_UINT; + + glGetPerfMonitorCounterStringAMD(m_group, m_id, 0, &length, nullptr); + name.resize(length); + glGetPerfMonitorCounterStringAMD(m_group, m_id, length, 0, &name[0]); + m_name = name; + m_precached = true; +} + +unsigned Metric_AMD_perfmon::id() { + return m_id; +} + +unsigned Metric_AMD_perfmon::groupId() { + return m_group; +} + +std::string Metric_AMD_perfmon::name() { + if (!m_precached) precache(); + return m_name; +} + +std::string Metric_AMD_perfmon::description() { + return ""; // no description available +} + +GLenum Metric_AMD_perfmon::size() { + GLenum type; + glGetPerfMonitorCounterInfoAMD(m_group, m_id, GL_COUNTER_TYPE_AMD, &type); + if (type == GL_UNSIGNED_INT) return sizeof(GLuint); + else if (type == GL_FLOAT || type == GL_PERCENTAGE_AMD) return sizeof(GLfloat); + else if (type == GL_UNSIGNED_INT64_AMD) return sizeof(uint64_t); + else return sizeof(GLuint); +} + +MetricNumType Metric_AMD_perfmon::numType() { + if (!m_precached) precache(); + return m_nType; +} + +MetricType Metric_AMD_perfmon::type() { + GLenum type; + glGetPerfMonitorCounterInfoAMD(m_group, m_id, GL_COUNTER_TYPE_AMD, &type); + if ((type == GL_UNSIGNED_INT || type == GL_UNSIGNED_INT64_AMD) || + (type == GL_FLOAT)) return CNT_TYPE_GENERIC; + else if (type == GL_PERCENTAGE_AMD) return CNT_TYPE_PERCENT; + else return CNT_TYPE_OTHER; +} + + +MetricBackend_AMD_perfmon::DataCollector::~DataCollector() { + for (auto &t1 : data) { + for (auto &t2 : t1) { + alloc.deallocate(t2, 1); + } + } +} + +unsigned* +MetricBackend_AMD_perfmon::DataCollector::newDataBuffer(unsigned event, + size_t size) +{ + // in case there is no data for previous events fill with nullptr + data[curPass].resize(event, nullptr); + data[curPass].push_back(alloc.allocate(size)); + return data[curPass][event]; +} + +void MetricBackend_AMD_perfmon::DataCollector::endPass() { + curPass++; + data.push_back(mmapdeque<unsigned*>(alloc)); +} + +unsigned* +MetricBackend_AMD_perfmon::DataCollector::getDataBuffer(unsigned pass, + unsigned event) +{ + if (event < data[pass].size()) { + return data[pass][event]; + } else return nullptr; +} + + +MetricBackend_AMD_perfmon::MetricBackend_AMD_perfmon(glretrace::Context* context, + MmapAllocator<char> &alloc) + : numPasses(1), curPass(0), curEvent(0), collector(alloc) { + if (context->hasExtension("GL_AMD_performance_monitor")) { + supported = true; + } else { + supported = false; + } +} + +bool MetricBackend_AMD_perfmon::isSupported() { + return supported; +} + +void MetricBackend_AMD_perfmon::enumGroups(enumGroupsCallback callback, + void* userData) +{ + std::vector<unsigned> groups; + int num_groups; + glGetPerfMonitorGroupsAMD(&num_groups, 0, nullptr); + groups.resize(num_groups); + glGetPerfMonitorGroupsAMD(nullptr, num_groups, &groups[0]); + for(unsigned &g : groups) { + callback(g, 0, userData); + } +} + +void MetricBackend_AMD_perfmon::enumMetrics(unsigned group, + enumMetricsCallback callback, + void* userData) +{ + std::vector<unsigned> metrics; + int num_metrics; + Metric_AMD_perfmon metric(0,0); + glGetPerfMonitorCountersAMD(group, &num_metrics, nullptr, 0, nullptr); + metrics.resize(num_metrics); + glGetPerfMonitorCountersAMD(group, nullptr, nullptr, num_metrics, &metrics[0]); + for(unsigned &c : metrics) { + metric = Metric_AMD_perfmon(group, c); + callback(&metric, 0, userData); + } +} + +std::unique_ptr<Metric> +MetricBackend_AMD_perfmon::getMetricById(unsigned groupId, unsigned metricId) +{ + std::unique_ptr<Metric> p(new Metric_AMD_perfmon(groupId, metricId)); + return p; +} + +void MetricBackend_AMD_perfmon::populateLookupGroups(unsigned group, + int error, + void* userData) +{ + reinterpret_cast<MetricBackend_AMD_perfmon*>(userData)->enumMetrics(group, + populateLookupMetrics); +} + +void MetricBackend_AMD_perfmon::populateLookupMetrics(Metric* metric, + int error, + void* userData) +{ + nameLookup[metric->name()] = std::make_pair(metric->groupId(), + metric->id()); +} + +std::unique_ptr<Metric> +MetricBackend_AMD_perfmon::getMetricByName(std::string metricName) +{ + if (nameLookup.empty()) { + enumGroups(populateLookupGroups, this); + } + if (nameLookup.count(metricName) > 0) { + std::unique_ptr<Metric> p(new Metric_AMD_perfmon(nameLookup[metricName].first, + nameLookup[metricName].second)); + return p; + } + else return nullptr; +} + +std::string MetricBackend_AMD_perfmon::getGroupName(unsigned group) { + int length; + std::string name; + glGetPerfMonitorGroupStringAMD(group, 0, &length, nullptr); + name.resize(length); + glGetPerfMonitorGroupStringAMD(group, length, 0, &name[0]); + return name; +} + +int MetricBackend_AMD_perfmon::enableMetric(Metric* metric_, QueryBoundary pollingRule) { + unsigned id = metric_->id(); + unsigned gid = metric_->groupId(); + unsigned monitor; + + // profiling only draw calls + if (pollingRule == QUERY_BOUNDARY_CALL) return 1; + + // check that Metric is valid metric + glGenPerfMonitorsAMD(1, &monitor); + glGetError(); + glSelectPerfMonitorCountersAMD(monitor, 1, gid, 1, &id); + GLenum err = glGetError(); + glDeletePerfMonitorsAMD(1, &monitor); + if (err == GL_INVALID_VALUE) { + return 1; + } + + Metric_AMD_perfmon metric(gid, id); + metric.numType(); // triggers metric vars precache (in case context changes) + metrics[pollingRule].push_back(metric); + return 0; +} + +bool +MetricBackend_AMD_perfmon::testMetrics(std::vector<Metric_AMD_perfmon>* metrics) { + unsigned monitor; + unsigned id; + glGenPerfMonitorsAMD(1, &monitor); + for (Metric_AMD_perfmon &c : *metrics) { + id = c.id(); + glSelectPerfMonitorCountersAMD(monitor, 1, c.groupId(), 1, &id); + } + glGetError(); + glBeginPerfMonitorAMD(monitor); + GLenum err = glGetError(); + glEndPerfMonitorAMD(monitor); + glDeletePerfMonitorsAMD(1, &monitor); + if (err == GL_INVALID_OPERATION) { + return 0; + } + return 1; +} + +void MetricBackend_AMD_perfmon::generatePassesBoundary(QueryBoundary boundary) { + std::vector<Metric_AMD_perfmon> copyMetrics(metrics[boundary]); + std::vector<Metric_AMD_perfmon> newPass; + while (!copyMetrics.empty()) { + std::vector<Metric_AMD_perfmon>::iterator it = copyMetrics.begin(); + while (it != copyMetrics.end()) { + newPass.push_back(*it); + if (!testMetrics(&newPass)) { + newPass.pop_back(); + break; + } + it = copyMetrics.erase(it); + } + passes.push_back(newPass); + newPass.clear(); + } +} + +unsigned MetricBackend_AMD_perfmon::generatePasses() { + generatePassesBoundary(QUERY_BOUNDARY_FRAME); + numFramePasses = passes.size(); + generatePassesBoundary(QUERY_BOUNDARY_DRAWCALL); + nameLookup.clear(); // no need in it after all metrics are set up + numPasses = passes.size(); + return passes.size(); +} + +void MetricBackend_AMD_perfmon::beginPass() { + if (!numPasses) return; + /* First process per-frame passes, then per-call passes */ + if (curPass < numFramePasses) { + perFrame = true; + } else { + perFrame = false; + } + /* Generate monitor */ + glGenPerfMonitorsAMD(NUM_MONITORS, monitors); + for (Metric_AMD_perfmon &c : passes[curPass]) { + unsigned id = c.id(); + for (int k = 0; k < NUM_MONITORS; k++) { + glSelectPerfMonitorCountersAMD(monitors[k], 1, c.groupId(), 1, &id); + } + } + curMonitor = 0; + firstRound = true; + curEvent = 0; + supported = true; // can change if context is switched, so revert back +} + +void MetricBackend_AMD_perfmon::endPass() { + if (supported && numPasses) { + for (unsigned k = 0; k < curMonitor; k++) { + freeMonitor(k); + } + glDeletePerfMonitorsAMD(NUM_MONITORS, monitors); + } + curPass++; + collector.endPass(); +} + +void MetricBackend_AMD_perfmon::pausePass() { + if (!supported || !numPasses) return; + // clear all queries and monitors + // ignore data from the query in progress + if (queryInProgress) { + glEndPerfMonitorAMD(monitors[curMonitor]); + curEvent++; + queryInProgress = false; + } + for (unsigned k = 0; k < curMonitor; k++) { + freeMonitor(k); + } + glDeletePerfMonitorsAMD(NUM_MONITORS, monitors); +} + +void MetricBackend_AMD_perfmon::continuePass() { + // here new context might be used + // better to check if it supports AMD_perfmon extension + glretrace::Context* context = glretrace::getCurrentContext(); + if (context && context->hasExtension("GL_AMD_performance_monitor")) { + supported = true; + } else { + supported = false; + } + + if (supported && numPasses) { + // call begin pass and save/restore event id + unsigned tempId = curEvent; + beginPass(); + curEvent = tempId; + } +} + +void MetricBackend_AMD_perfmon::beginQuery(QueryBoundary boundary) { + if (!supported || !numPasses) return; + if (boundary == QUERY_BOUNDARY_CALL) return; + if ((boundary == QUERY_BOUNDARY_FRAME) && !perFrame) return; + if ((boundary == QUERY_BOUNDARY_DRAWCALL) && perFrame) return; + + curMonitor %= NUM_MONITORS; + if (!firstRound) freeMonitor(curMonitor); // get existing data + monitorEvent[curMonitor] = curEvent; // save monitored event + glBeginPerfMonitorAMD(monitors[curMonitor]); + queryInProgress = true; +} + +void MetricBackend_AMD_perfmon::endQuery(QueryBoundary boundary) { + if (!queryInProgress) return; + if (!supported || !numPasses) return; + if (boundary == QUERY_BOUNDARY_CALL) return; + if ((boundary == QUERY_BOUNDARY_FRAME) && !perFrame) return; + if ((boundary == QUERY_BOUNDARY_DRAWCALL) && perFrame) return; + + curEvent++; + glEndPerfMonitorAMD(monitors[curMonitor]); + curMonitor++; + if (curMonitor == NUM_MONITORS) firstRound = 0; + queryInProgress = false; +} + +void MetricBackend_AMD_perfmon::freeMonitor(unsigned monitorId) { + unsigned monitor = monitors[monitorId]; + GLuint dataAvail = 0; + GLuint size; + + glFlush(); + while (!dataAvail) { + glGetPerfMonitorCounterDataAMD(monitor, GL_PERFMON_RESULT_AVAILABLE_AMD, + sizeof(GLuint), &dataAvail, nullptr); + } + glGetPerfMonitorCounterDataAMD(monitor, GL_PERFMON_RESULT_SIZE_AMD, + sizeof(GLuint), &size, nullptr); + // collect data + unsigned* buf = collector.newDataBuffer(monitorEvent[monitorId], + size/sizeof(unsigned)); + glGetPerfMonitorCounterDataAMD(monitor, GL_PERFMON_RESULT_AMD, size, buf, nullptr); + + /* populate metricOffsets */ + if (metricOffsets.size() < curPass + 1) { + std::map<std::pair<unsigned, unsigned>, unsigned> pairOffsets; + unsigned offset = 0; + unsigned id, gid; + for (int k = 0; k < passes[curPass].size(); k++) { + gid = buf[offset++]; + id = buf[offset++]; + pairOffsets[std::make_pair(gid, id)] = offset; + Metric_AMD_perfmon metric(gid, id); + offset += metric.size() / sizeof(unsigned); + } + // translate to existing metrics in passes variable + std::map<Metric_AMD_perfmon*, unsigned> temp; + for (auto &m : passes[curPass]) { + id = m.id(); + gid = m.groupId(); + temp[&m] = pairOffsets[std::make_pair(gid, id)]; + } + metricOffsets.push_back(std::move(temp)); + } +} + +void +MetricBackend_AMD_perfmon::enumDataQueryId(unsigned id, + enumDataCallback callback, + QueryBoundary boundary, + void* userData) +{ + /* Determine passes to return depending on the boundary */ + if (boundary == QUERY_BOUNDARY_CALL) return; + unsigned j = 0; + unsigned nPasses = numFramePasses; + if (boundary == QUERY_BOUNDARY_DRAWCALL) { + j = numFramePasses; + nPasses = numPasses; + } + /* enum passes */ + for (; j < nPasses; j++) { + unsigned* buf = collector.getDataBuffer(j, id); + for (auto &m : passes[j]) { + void* data = (buf) ? &buf[metricOffsets[j][&m]] : nullptr; + callback(&m, id, data, 0, userData); + } + } +} + +unsigned MetricBackend_AMD_perfmon::getNumPasses() { + return numPasses; +} + +MetricBackend_AMD_perfmon& +MetricBackend_AMD_perfmon::getInstance(glretrace::Context* context, + MmapAllocator<char> &alloc) { + static MetricBackend_AMD_perfmon backend(context, alloc); + return backend; +} + + +std::map<std::string, std::pair<unsigned, unsigned>> MetricBackend_AMD_perfmon::nameLookup; diff --git a/retrace/metric_backend_amd_perfmon.hpp b/retrace/metric_backend_amd_perfmon.hpp new file mode 100644 index 00000000..39be7e9d --- /dev/null +++ b/retrace/metric_backend_amd_perfmon.hpp @@ -0,0 +1,178 @@ +/************************************************************************** + * + * Copyright 2015 Alexander Trukhin + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + +#include <memory> +#include <vector> +#include <map> +#include <string> + +#include "glproc.hpp" +#include "metric_backend.hpp" +#include "glretrace.hpp" +#include "mmap_allocator.hpp" + +#define NUM_MONITORS 1 // number of max used AMD_perfmon monitors + +class Metric_AMD_perfmon : public Metric +{ +private: + unsigned m_group, m_id; + MetricNumType m_nType; + std::string m_name; + bool m_precached; + + void precache(); + +public: + Metric_AMD_perfmon(unsigned g, unsigned i) : m_group(g), m_id(i), + m_nType(CNT_NUM_UINT), + m_precached(false) {} + + GLenum size(); + + + unsigned id(); + + unsigned groupId(); + + std::string name(); + + std::string description(); + + MetricNumType numType(); + + MetricType type(); +}; + + +class MetricBackend_AMD_perfmon : public MetricBackend +{ +private: + class DataCollector + { + private: + MmapAllocator<unsigned> alloc; // allocator + // deque with custom allocator + template <class T> + using mmapdeque = std::deque<T, MmapAllocator<T>>; + // data storage + mmapdeque<mmapdeque<unsigned*>> data; + unsigned curPass; + + public: + DataCollector(MmapAllocator<char> &alloc) + : alloc(alloc), data(1, mmapdeque<unsigned*>(alloc), alloc), + curPass(0) {} + + ~DataCollector(); + + unsigned* newDataBuffer(unsigned event, size_t size); + + void endPass(); + + unsigned* getDataBuffer(unsigned pass, unsigned event); + }; + +private: + bool supported; // extension support (checked initially and w/ context switch) + bool firstRound; // first profiling round (no need to free monitors) + bool perFrame; // profiling frames? + bool queryInProgress; + unsigned monitors[NUM_MONITORS]; // For cycling + unsigned curMonitor; + unsigned monitorEvent[NUM_MONITORS]; // Event saved in monitor + unsigned numPasses; // all passes + unsigned numFramePasses; // frame passes + unsigned curPass; + unsigned curEvent; // Currently evaluated event + // metrics selected for profiling boundaries (frames, draw calls) + std::vector<Metric_AMD_perfmon> metrics[2]; + // metric sets for each pass + std::vector<std::vector<Metric_AMD_perfmon>> passes; + // metric offsets in data for each pass + std::vector<std::map<Metric_AMD_perfmon*, unsigned>> metricOffsets; + DataCollector collector; // data storage + // lookup table (metric name -> (gid, id)) + static std::map<std::string, std::pair<unsigned, unsigned>> nameLookup; + + MetricBackend_AMD_perfmon(glretrace::Context* context, MmapAllocator<char> &alloc); + + MetricBackend_AMD_perfmon(MetricBackend_AMD_perfmon const&) = delete; + + void operator=(MetricBackend_AMD_perfmon const&) = delete; + + // test if given set of metrics can be sampled in one pass + bool testMetrics(std::vector<Metric_AMD_perfmon>* metrics); + + void freeMonitor(unsigned monitor); // collect metrics data from the monitor + + static void populateLookupGroups(unsigned group, int error, void* userData); + + static void populateLookupMetrics(Metric* metric, int error, void* userData); + + void generatePassesBoundary(QueryBoundary boundary); + +public: + bool isSupported(); + + void enumGroups(enumGroupsCallback callback, void* userData = nullptr); + + void enumMetrics(unsigned group, enumMetricsCallback callback, + void* userData = nullptr); + + std::unique_ptr<Metric> getMetricById(unsigned groupId, unsigned metricId); + + std::unique_ptr<Metric> getMetricByName(std::string metricName); + + std::string getGroupName(unsigned group); + + int enableMetric(Metric* metric, QueryBoundary pollingRule = QUERY_BOUNDARY_DRAWCALL); + + unsigned generatePasses(); + + void beginPass(); + + void endPass(); + + void pausePass(); + + void continuePass(); + + void beginQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void endQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void enumDataQueryId(unsigned id, enumDataCallback callback, + QueryBoundary boundary, + void* userData = nullptr); + + unsigned getNumPasses(); + + static MetricBackend_AMD_perfmon& getInstance(glretrace::Context* context, + MmapAllocator<char> &alloc); +}; + diff --git a/retrace/metric_backend_intel_perfquery.cpp b/retrace/metric_backend_intel_perfquery.cpp new file mode 100644 index 00000000..a5d922cc --- /dev/null +++ b/retrace/metric_backend_intel_perfquery.cpp @@ -0,0 +1,361 @@ +/************************************************************************** + * + * Copyright 2015 Alexander Trukhin + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#include "metric_backend_intel_perfquery.hpp" + +void Metric_INTEL_perfquery::precache() { + unsigned offset; + glGetPerfCounterInfoINTEL(m_group, m_id, 0, nullptr, 0, nullptr, &offset, + nullptr, nullptr, nullptr, nullptr); + this->m_offset = offset; + + GLenum type; + glGetPerfCounterInfoINTEL(m_group, m_id, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr, &type, nullptr); + if (type == GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL) m_nType = CNT_NUM_UINT; + else if (type == GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL) m_nType = CNT_NUM_FLOAT; + else if (type == GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL) m_nType = CNT_NUM_DOUBLE; + else if (type == GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL) m_nType = CNT_NUM_BOOL; + else if (type == GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL) m_nType = CNT_NUM_UINT64; + else m_nType = CNT_NUM_UINT; + + char name[INTEL_NAME_LENGTH]; + glGetPerfCounterInfoINTEL(m_group, m_id, INTEL_NAME_LENGTH, name, 0, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr); + m_name = std::string(name); + + m_precached = true; +} + +unsigned Metric_INTEL_perfquery::id() { + return m_id; +} + +unsigned Metric_INTEL_perfquery::groupId() { + return m_group; +} + +std::string Metric_INTEL_perfquery::name() { + if (!m_precached) precache(); + return m_name; +} + +std::string Metric_INTEL_perfquery::description() { + char desc[INTEL_DESC_LENGTH]; + glGetPerfCounterInfoINTEL(m_group, m_id, 0, nullptr, INTEL_DESC_LENGTH, desc, + nullptr, nullptr, nullptr, nullptr, nullptr); + return std::string(desc); +} + +unsigned Metric_INTEL_perfquery::offset() { + if (!m_precached) precache(); + return m_offset; +} + +MetricNumType Metric_INTEL_perfquery::numType() { + if (!m_precached) precache(); + return m_nType; +} + +MetricType Metric_INTEL_perfquery::type() { + GLenum type; + glGetPerfCounterInfoINTEL(m_group, m_id, 0, nullptr, 0, nullptr, nullptr, + nullptr, &type, nullptr, nullptr); + if (type == GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL) return CNT_TYPE_TIMESTAMP; + else if (type == GL_PERFQUERY_COUNTER_EVENT_INTEL) return CNT_TYPE_NUM; + else if (type == GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL || + type == GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL) return CNT_TYPE_DURATION; + else if (type == GL_PERFQUERY_COUNTER_RAW_INTEL) return CNT_TYPE_GENERIC; + else if (type == GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL) return CNT_TYPE_GENERIC; + else return CNT_TYPE_OTHER; +} + +MetricBackend_INTEL_perfquery::DataCollector::~DataCollector() { + for (auto &t1 : data) { + for (auto &t2 : t1) { + alloc.deallocate(t2, 1); + } + } +} + +unsigned char* +MetricBackend_INTEL_perfquery::DataCollector::newDataBuffer(unsigned event, + size_t size) +{ + // in case there is no data for previous events fill with nullptr + data[curPass].resize(event, nullptr); + data[curPass].push_back(alloc.allocate(size)); + return data[curPass][event]; +} + +void MetricBackend_INTEL_perfquery::DataCollector::endPass() { + curPass++; + data.push_back(mmapdeque<unsigned char*>(alloc)); +} + +unsigned char* +MetricBackend_INTEL_perfquery::DataCollector::getDataBuffer(unsigned pass, + unsigned event) +{ + if (event < data[pass].size()) { + return data[pass][event]; + } else return nullptr; +} + +MetricBackend_INTEL_perfquery::MetricBackend_INTEL_perfquery(glretrace::Context* context, + MmapAllocator<char> &alloc) + : numPasses(1), curPass(0), curEvent(0), collector(alloc) { + if (context->hasExtension("GL_INTEL_performance_query")) { + supported = true; + } else { + supported = false; + } +} + +bool MetricBackend_INTEL_perfquery::isSupported() { + return supported; +} + +void MetricBackend_INTEL_perfquery::enumGroups(enumGroupsCallback callback, + void* userData) +{ + unsigned gid; + glGetFirstPerfQueryIdINTEL(&gid); + while (gid) { + callback(gid, 0, userData); + glGetNextPerfQueryIdINTEL(gid, &gid); + } +} + +void +MetricBackend_INTEL_perfquery::enumMetrics(unsigned group, + enumMetricsCallback callback, + void* userData) +{ + unsigned numMetrics; + glGetPerfQueryInfoINTEL(group, 0, nullptr, nullptr, &numMetrics, nullptr, nullptr); + for (int i = 1; i <= numMetrics; i++) { + Metric_INTEL_perfquery metric = Metric_INTEL_perfquery(group, i); + callback(&metric, 0, userData); + } +} + +std::unique_ptr<Metric> +MetricBackend_INTEL_perfquery::getMetricById(unsigned groupId, unsigned metricId) +{ + std::unique_ptr<Metric> p(new Metric_INTEL_perfquery(groupId, metricId)); + return p; +} + +void MetricBackend_INTEL_perfquery::populateLookupGroups(unsigned group, + int error, + void* userData) +{ + reinterpret_cast<MetricBackend_INTEL_perfquery*>(userData)->enumMetrics(group, populateLookupMetrics); +} + +void MetricBackend_INTEL_perfquery::populateLookupMetrics(Metric* metric, + int error, + void* userData) +{ + nameLookup[metric->name()] = std::make_pair(metric->groupId(), + metric->id()); +} + +std::unique_ptr<Metric> +MetricBackend_INTEL_perfquery::getMetricByName(std::string metricName) +{ + if (nameLookup.empty()) { + enumGroups(populateLookupGroups, this); + } + if (nameLookup.count(metricName) > 0) { + std::unique_ptr<Metric> p(new Metric_INTEL_perfquery(nameLookup[metricName].first, + nameLookup[metricName].second)); + return p; + } + else return nullptr; +} + +std::string MetricBackend_INTEL_perfquery::getGroupName(unsigned group) { + char name[INTEL_NAME_LENGTH]; + glGetPerfQueryInfoINTEL(group, INTEL_NAME_LENGTH, name, nullptr, + nullptr, nullptr, nullptr); + return std::string(name); +} + +int MetricBackend_INTEL_perfquery::enableMetric(Metric* metric_, QueryBoundary pollingRule) { + if (pollingRule == QUERY_BOUNDARY_CALL) return 1; + unsigned id = metric_->id(); + unsigned gid = metric_->groupId(); + unsigned numCounters; + + /* check that counter id is in valid range and group exists */ + glGetError(); + glGetPerfQueryInfoINTEL(gid, 0, nullptr, nullptr, &numCounters, nullptr, nullptr); + GLenum err = glGetError(); + if (gid >= numCounters || err == GL_INVALID_VALUE) { + return 1; + } + + Metric_INTEL_perfquery metric(gid, id); + metric.offset(); // triggers metric vars precache (in case context changes) + passes[pollingRule][gid].push_back(metric); + return 0; +} + +unsigned MetricBackend_INTEL_perfquery::generatePasses() { + /* begin with passes that profile frames */ + perFrame = true; + curQueryMetrics = passes[QUERY_BOUNDARY_FRAME].begin(); + numFramePasses = passes[QUERY_BOUNDARY_FRAME].size(); + numPasses = numFramePasses + passes[QUERY_BOUNDARY_DRAWCALL].size(); + nameLookup.clear(); // no need in it after all metrics are set up + return numPasses; +} + +void MetricBackend_INTEL_perfquery::beginPass() { + if (!numPasses || curQueryMetrics == passes[QUERY_BOUNDARY_DRAWCALL].end()) return; + /* advance to draw calls after frames */ + if (curQueryMetrics == passes[QUERY_BOUNDARY_FRAME].end()) { + perFrame = false; + curQueryMetrics = passes[QUERY_BOUNDARY_DRAWCALL].begin(); + } + glCreatePerfQueryINTEL(curQueryMetrics->first, &curQuery); + curEvent = 0; + supported = true; // can change if context is switched, so revert back +} + +void MetricBackend_INTEL_perfquery::endPass() { + if (supported && numPasses) { + glDeletePerfQueryINTEL(curQuery); + } + curPass++; + curQueryMetrics++; + collector.endPass(); +} + +void MetricBackend_INTEL_perfquery::pausePass() { + if (!supported || !numPasses) return; + // end query + // ignore data from the query in progress + if (queryInProgress) { + glEndPerfQueryINTEL(curQuery); + curEvent++; + queryInProgress = false; + } + glDeletePerfQueryINTEL(curQuery); +} + +void MetricBackend_INTEL_perfquery::continuePass() { + // here new context might be used + // better to check if it supports INTEL_perfquery extension + glretrace::Context* context = glretrace::getCurrentContext(); + if (context && context->hasExtension("GL_INTEL_performance_query")) { + supported = true; + } else { + supported = false; + } + + if (supported && numPasses) { + // call begin pass and save/restore event id + unsigned tempId = curEvent; + beginPass(); + curEvent = tempId; + } +} + +void MetricBackend_INTEL_perfquery::beginQuery(QueryBoundary boundary) { + if (!supported || !numPasses) return; + if (boundary == QUERY_BOUNDARY_CALL) return; + if ((boundary == QUERY_BOUNDARY_FRAME) && !perFrame) return; + if ((boundary == QUERY_BOUNDARY_DRAWCALL) && perFrame) return; + glBeginPerfQueryINTEL(curQuery); + queryInProgress = true; +} + +void MetricBackend_INTEL_perfquery::endQuery(QueryBoundary boundary) { + if (!queryInProgress) return; + if (!supported || !numPasses) return; + if (boundary == QUERY_BOUNDARY_CALL) return; + if ((boundary == QUERY_BOUNDARY_FRAME) && !perFrame) return; + if ((boundary == QUERY_BOUNDARY_DRAWCALL) && perFrame) return; + glEndPerfQueryINTEL(curQuery); + freeQuery(curEvent++); + queryInProgress = false; +} + +void MetricBackend_INTEL_perfquery::freeQuery(unsigned event) { + GLuint size; + GLuint bWritten; + glGetPerfQueryInfoINTEL(curQueryMetrics->first, 0, nullptr, &size, + nullptr, nullptr, nullptr); + unsigned char* data = collector.newDataBuffer(event, size); + + glFlush(); + glGetPerfQueryDataINTEL(curQuery, GL_PERFQUERY_WAIT_INTEL, size, data, &bWritten); + // bWritten != size -> should generate error TODO +} + +void MetricBackend_INTEL_perfquery::enumDataQueryId(unsigned id, + enumDataCallback callback, + QueryBoundary boundary, + void* userData) +{ + /* Determine passes to return depending on the boundary */ + if (boundary == QUERY_BOUNDARY_CALL) return; + auto queryIt = passes[QUERY_BOUNDARY_FRAME].begin(); + unsigned j = 0; + unsigned nPasses = numFramePasses; + if (boundary == QUERY_BOUNDARY_DRAWCALL) { + queryIt = passes[QUERY_BOUNDARY_DRAWCALL].begin(); + j = numFramePasses; + nPasses = numPasses; + } + for (; j < nPasses; j++) { + unsigned char* buf = collector.getDataBuffer(j, id); + for (auto &k : queryIt->second) { + if (buf) { + callback(&k, id, &buf[k.offset()], 0, userData); + } else { // No data buffer (in case event #id is not a draw call) + callback(&k, id, nullptr, 0, userData); + } + } + queryIt++; + } +} + +unsigned MetricBackend_INTEL_perfquery::getNumPasses() { + return numPasses; +} + +MetricBackend_INTEL_perfquery& +MetricBackend_INTEL_perfquery::getInstance(glretrace::Context* context, + MmapAllocator<char> &alloc) { + static MetricBackend_INTEL_perfquery backend(context, alloc); + return backend; +} + + +std::map<std::string, std::pair<unsigned, unsigned>> MetricBackend_INTEL_perfquery::nameLookup; diff --git a/retrace/metric_backend_intel_perfquery.hpp b/retrace/metric_backend_intel_perfquery.hpp new file mode 100644 index 00000000..91be55b3 --- /dev/null +++ b/retrace/metric_backend_intel_perfquery.hpp @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2015 Alexander Trukhin + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + +#include <memory> +#include <vector> +#include <map> +#include <string> + +#include "glproc.hpp" +#include "metric_backend.hpp" +#include "glretrace.hpp" + +#define INTEL_NAME_LENGTH 256 // metric name with max 256 chars +#define INTEL_DESC_LENGTH 1024 // description max 1024 chars + +class Metric_INTEL_perfquery : public Metric +{ +private: + unsigned m_group, m_id; + unsigned m_offset; + std:: string m_name; + MetricNumType m_nType; + bool m_precached; + + void precache(); + +public: + Metric_INTEL_perfquery(unsigned g, unsigned i) : m_group(g), m_id(i), + m_offset(0), + m_nType(CNT_NUM_UINT), + m_precached(false) {} + + unsigned offset(); + + + unsigned id(); + + unsigned groupId(); + + std::string name(); + + std::string description(); + + MetricNumType numType(); + + MetricType type(); +}; + + +class MetricBackend_INTEL_perfquery : public MetricBackend +{ +private: + class DataCollector + { + private: + MmapAllocator<unsigned char> alloc; + // deque with custom allocator + template <class T> + using mmapdeque = std::deque<T, MmapAllocator<T>>; + // data storage + mmapdeque<mmapdeque<unsigned char*>> data; + unsigned curPass; + + public: + DataCollector(MmapAllocator<char> &alloc) + : alloc(alloc), data(1, mmapdeque<unsigned char*>(alloc), alloc), + curPass(0) {} + + ~DataCollector(); + + unsigned char* newDataBuffer(unsigned event, size_t size); + + void endPass(); + + unsigned char* getDataBuffer(unsigned pass, unsigned event); + }; + +private: + // map from query id to its Metric list + std::map<unsigned, std::vector<Metric_INTEL_perfquery>> passes[2]; + /* curQueryMetrics -- iterator through passes */ + std::map<unsigned, std::vector<Metric_INTEL_perfquery>>::iterator curQueryMetrics; + unsigned curQuery; + bool supported; + bool perFrame; + bool queryInProgress; + int numPasses; + int numFramePasses; + int curPass; + unsigned curEvent; // Currently evaluated event + DataCollector collector; + /* nameLookup for querying metrics by name */ + static std::map<std::string, std::pair<unsigned, unsigned>> nameLookup; + + MetricBackend_INTEL_perfquery(glretrace::Context* context, MmapAllocator<char> &alloc); + + MetricBackend_INTEL_perfquery(MetricBackend_INTEL_perfquery const&) = delete; + + void operator=(MetricBackend_INTEL_perfquery const&) = delete; + + void freeQuery(unsigned event); // collect metrics data from the query + + static void populateLookupGroups(unsigned group, int error, void* userData); + + static void populateLookupMetrics(Metric* metric, int error, void* userData); + +public: + bool isSupported(); + + void enumGroups(enumGroupsCallback callback, void* userData = nullptr); + + void enumMetrics(unsigned group, enumMetricsCallback callback, + void* userData = nullptr); + + std::unique_ptr<Metric> getMetricById(unsigned groupId, unsigned metricId); + + std::unique_ptr<Metric> getMetricByName(std::string metricName); + + std::string getGroupName(unsigned group); + + int enableMetric(Metric* metric, QueryBoundary pollingRule = QUERY_BOUNDARY_DRAWCALL); + + unsigned generatePasses(); + + void beginPass(); + + void endPass(); + + void pausePass(); + + void continuePass(); + + void beginQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void endQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void enumDataQueryId(unsigned id, enumDataCallback callback, + QueryBoundary boundary, + void* userData = nullptr); + + unsigned getNumPasses(); + + static MetricBackend_INTEL_perfquery& getInstance(glretrace::Context* context, + MmapAllocator<char> &alloc); +}; + diff --git a/retrace/metric_backend_opengl.cpp b/retrace/metric_backend_opengl.cpp new file mode 100644 index 00000000..fec8dcde --- /dev/null +++ b/retrace/metric_backend_opengl.cpp @@ -0,0 +1,456 @@ +#include "metric_backend_opengl.hpp" +#include "os_time.hpp" +#include "os_memory.hpp" + +void +MetricBackend_opengl::Storage::addData(QueryBoundary boundary, int64_t data) { + this->data[boundary].push_back(data); +} + +int64_t* MetricBackend_opengl::Storage::getData(QueryBoundary boundary, + unsigned eventId) +{ + return &(data[boundary][eventId]); +} + +Metric_opengl::Metric_opengl(unsigned gId, unsigned id, const std::string &name, + const std::string &desc, MetricNumType nT, MetricType t) + : m_gId(gId), m_id(id), m_name(name), m_desc(desc), m_nType(nT), + m_type(t), available(false) +{ + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + profiled[i] = false; + enabled[i] = false; + } +} + +unsigned Metric_opengl::id() { + return m_id; +} + +unsigned Metric_opengl::groupId() { + return m_gId; +} + +std::string Metric_opengl::name() { + return m_name; +} + +std::string Metric_opengl::description() { + return m_desc; +} + +MetricNumType Metric_opengl::numType() { + return m_nType; +} + +MetricType Metric_opengl::type() { + return m_type; +} + +MetricBackend_opengl::MetricBackend_opengl(glretrace::Context* context, + MmapAllocator<char> &alloc) + : alloc(alloc) +{ + glprofile::Profile currentProfile = context->actualProfile(); + supportsTimestamp = currentProfile.versionGreaterOrEqual(glprofile::API_GL, 3, 3) || + context->hasExtension("GL_ARB_timer_query"); + supportsElapsed = context->hasExtension("GL_EXT_timer_query") || supportsTimestamp; + supportsOcclusion = currentProfile.versionGreaterOrEqual(glprofile::API_GL, 1, 5); + + #ifdef __APPLE__ + // GL_TIMESTAMP doesn't work on Apple. GL_TIME_ELAPSED still does however. + // http://lists.apple.com/archives/mac-opengl/2014/Nov/threads.html#00001 + supportsTimestamp = false; + #endif + + // Add metrics below + metrics.emplace_back(0, 0, "CPU Start", "", CNT_NUM_INT64, CNT_TYPE_TIMESTAMP); + metrics.emplace_back(0, 1, "CPU Duration", "", CNT_NUM_INT64, CNT_TYPE_DURATION); + metrics.emplace_back(1, 0, "GPU Start", "", CNT_NUM_INT64, CNT_TYPE_TIMESTAMP); + metrics.emplace_back(1, 1, "GPU Duration", "", CNT_NUM_INT64, CNT_TYPE_DURATION); + metrics.emplace_back(1, 2, "Pixels Drawn", "", CNT_NUM_INT64, CNT_TYPE_GENERIC); + metrics.emplace_back(0, 2, "VSIZE Start", "", CNT_NUM_INT64, CNT_TYPE_GENERIC); + metrics.emplace_back(0, 3, "VSIZE Duration", "", CNT_NUM_INT64, CNT_TYPE_GENERIC); + metrics.emplace_back(0, 4, "RSS Start", "", CNT_NUM_INT64, CNT_TYPE_GENERIC); + metrics.emplace_back(0, 5, "RSS Duration", "", CNT_NUM_INT64, CNT_TYPE_GENERIC); + + metrics[METRIC_CPU_START].available = true; + metrics[METRIC_CPU_DURATION].available = true; + metrics[METRIC_CPU_VSIZE_START].available = true; + metrics[METRIC_CPU_VSIZE_DURATION].available = true; + metrics[METRIC_CPU_RSS_START].available = true; + metrics[METRIC_CPU_RSS_DURATION].available = true; + if (supportsTimestamp) metrics[METRIC_GPU_START].available = true; + if (supportsElapsed) { + GLint bits = 0; + glGetQueryiv(GL_TIME_ELAPSED, GL_QUERY_COUNTER_BITS, &bits); + if (bits) metrics[METRIC_GPU_DURATION].available = true; + } + if (supportsOcclusion) { + metrics[METRIC_GPU_PIXELS].available = true; + } + + // populate lookups + for (auto &m : metrics) { + idLookup[std::make_pair(m.groupId(), m.id())] = &m; + nameLookup[m.name()] = &m; + } +} + +int64_t MetricBackend_opengl::getCurrentTime(void) { + if (supportsTimestamp && cpugpuSync) { + /* Get the current GL time without stalling */ + GLint64 timestamp = 0; + glGetInteger64v(GL_TIMESTAMP, ×tamp); + return timestamp; + } else { + return os::getTime(); + } +} + +int64_t MetricBackend_opengl::getTimeFrequency(void) { + if (supportsTimestamp && cpugpuSync) { + return 1000000000; + } else { + return os::timeFrequency; + } +} + + +bool MetricBackend_opengl::isSupported() { + return true; + // though individual metrics might be not supported +} + +void MetricBackend_opengl::enumGroups(enumGroupsCallback callback, void* userData) { + callback(0, 0, userData); // cpu group + callback(1, 0, userData); // gpu group +} + +std::string MetricBackend_opengl::getGroupName(unsigned group) { + switch(group) { + case 0: + return "CPU"; + case 1: + return "GPU"; + default: + return ""; + } +} + +void MetricBackend_opengl::enumMetrics(unsigned group, enumMetricsCallback callback, void* userData) { + for (auto &m : metrics) { + if (m.groupId() == group && m.available) { + callback(&m, 0, userData); + } + } +} + +std::unique_ptr<Metric> +MetricBackend_opengl::getMetricById(unsigned groupId, unsigned metricId) { + auto entryToCopy = idLookup.find(std::make_pair(groupId, metricId)); + if (entryToCopy != idLookup.end()) { + return std::unique_ptr<Metric>(new Metric_opengl(*entryToCopy->second)); + } else { + return nullptr; + } +} + +std::unique_ptr<Metric> +MetricBackend_opengl::getMetricByName(std::string metricName) { + auto entryToCopy = nameLookup.find(metricName); + if (entryToCopy != nameLookup.end()) { + return std::unique_ptr<Metric>(new Metric_opengl(*entryToCopy->second)); + } else { + return nullptr; + } +} + + +int MetricBackend_opengl::enableMetric(Metric* metric, QueryBoundary pollingRule) { + // metric is not necessarily the same object as in metrics[] + auto entry = idLookup.find(std::make_pair(metric->groupId(), metric->id())); + if ((entry != idLookup.end()) && entry->second->available) { + entry->second->enabled[pollingRule] = true; + return 0; + } + return 1; +} + +unsigned MetricBackend_opengl::generatePasses() { + // draw calls profiling not needed if all calls are profiled + for (int i = 0; i < METRIC_LIST_END; i++) { + if (metrics[i].enabled[QUERY_BOUNDARY_CALL]) { + metrics[i].enabled[QUERY_BOUNDARY_DRAWCALL] = false; + } + } + // setup storage for profiled metrics + for (int i = 0; i < METRIC_LIST_END; i++) { + for (int j = 0; j < QUERY_BOUNDARY_LIST_END; j++) { + if (metrics[i].enabled[j]) { + data[i][j] = std::unique_ptr<Storage>(new Storage(alloc)); + } + } + } + // check if GL queries are needed + glQueriesNeededAnyBoundary = false; + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + if (metrics[METRIC_GPU_START].enabled[i] || + metrics[METRIC_GPU_DURATION].enabled[i] || + metrics[METRIC_GPU_PIXELS].enabled[i]) + { + glQueriesNeeded[i] = true; + glQueriesNeededAnyBoundary = true; + } else { + glQueriesNeeded[i] = false; + } + } + // check if CPU <-> GPU sync is required + // this is the case if any gpu time is requested + cpugpuSync = false; + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + if (metrics[METRIC_GPU_START].enabled[i] || + metrics[METRIC_GPU_DURATION].enabled[i]) + { + cpugpuSync = true; + break; + } + } + // check if two passes are needed + // GL_TIME_ELAPSED (gpu dur) and GL_SAMPLES_PASSED (pixels) cannot be nested + if (!supportsTimestamp && + metrics[METRIC_GPU_DURATION].enabled[QUERY_BOUNDARY_FRAME] && + (metrics[METRIC_GPU_DURATION].enabled[QUERY_BOUNDARY_CALL] || + metrics[METRIC_GPU_DURATION].enabled[QUERY_BOUNDARY_DRAWCALL])) + { + twoPasses = true; + } + if (metrics[METRIC_GPU_PIXELS].enabled[QUERY_BOUNDARY_FRAME] && + (metrics[METRIC_GPU_PIXELS].enabled[QUERY_BOUNDARY_CALL] || + metrics[METRIC_GPU_PIXELS].enabled[QUERY_BOUNDARY_DRAWCALL])) + { + twoPasses = true; + } + + curPass = 1; + return twoPasses ? 2 : 1; +} + +void MetricBackend_opengl::beginPass() { + if (curPass == 1) { + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + for (auto &m : metrics) { + if (m.enabled[i]) m.profiled[i] = true; + } + } + // profile frames in first pass + if (twoPasses) { + if (!supportsTimestamp) { + metrics[METRIC_GPU_DURATION].profiled[QUERY_BOUNDARY_DRAWCALL] = false; + metrics[METRIC_GPU_DURATION].profiled[QUERY_BOUNDARY_CALL] = false; + } + metrics[METRIC_GPU_PIXELS].profiled[QUERY_BOUNDARY_DRAWCALL] = false; + metrics[METRIC_GPU_PIXELS].profiled[QUERY_BOUNDARY_CALL] = false; + } + } + else if (curPass == 2) { + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + for (auto &m : metrics) { + m.profiled[i] = false; + } + } + // profile calls/draw calls in second pass + if (!supportsTimestamp) { + if (metrics[METRIC_GPU_DURATION].enabled[QUERY_BOUNDARY_DRAWCALL]) { + metrics[METRIC_GPU_DURATION].profiled[QUERY_BOUNDARY_DRAWCALL] = true; + } + if (metrics[METRIC_GPU_DURATION].enabled[QUERY_BOUNDARY_CALL]) { + metrics[METRIC_GPU_DURATION].profiled[QUERY_BOUNDARY_CALL] = true; + } + } + if (metrics[METRIC_GPU_PIXELS].enabled[QUERY_BOUNDARY_DRAWCALL]) { + metrics[METRIC_GPU_PIXELS].profiled[QUERY_BOUNDARY_DRAWCALL] = true; + } + if (metrics[METRIC_GPU_PIXELS].enabled[QUERY_BOUNDARY_CALL]) { + metrics[METRIC_GPU_PIXELS].profiled[QUERY_BOUNDARY_CALL] = true; + } + } + // setup times + cpuTimeScale = 1.0E9 / getTimeFrequency(); + baseTime = getCurrentTime() * cpuTimeScale; +} + +void MetricBackend_opengl::processQueries() { + int64_t gpuStart, gpuEnd, pixels; + for (int i = 0; i < QUERY_BOUNDARY_LIST_END; i++) { + QueryBoundary boundary = static_cast<QueryBoundary>(i); + while (!queries[i].empty()) { + auto &query = queries[i].front(); + if (metrics[METRIC_GPU_START].profiled[i]) { + glGetQueryObjecti64v(query[QUERY_GPU_START], GL_QUERY_RESULT, + &gpuStart); + int64_t value = gpuStart - baseTime; + data[METRIC_GPU_START][i]->addData(boundary, value); + } + if (metrics[METRIC_GPU_DURATION].profiled[i]) { + if (supportsTimestamp) { + glGetQueryObjecti64v(query[QUERY_GPU_DURATION], GL_QUERY_RESULT, + &gpuEnd); + gpuEnd -= gpuStart; + } else { + glGetQueryObjecti64vEXT(query[QUERY_GPU_DURATION], GL_QUERY_RESULT, + &gpuEnd); + } + data[METRIC_GPU_DURATION][i]->addData(boundary, gpuEnd); + } + if (metrics[METRIC_GPU_PIXELS].profiled[i]) { + if (supportsTimestamp) { + glGetQueryObjecti64v(query[QUERY_OCCLUSION], GL_QUERY_RESULT, &pixels); + } else if (supportsElapsed) { + glGetQueryObjecti64vEXT(query[QUERY_OCCLUSION], GL_QUERY_RESULT, &pixels); + } else { + uint32_t pixels32; + glGetQueryObjectuiv(query[QUERY_OCCLUSION], GL_QUERY_RESULT, &pixels32); + pixels = static_cast<int64_t>(pixels32); + } + data[METRIC_GPU_PIXELS][i]->addData(boundary, pixels); + } + glDeleteQueries(QUERY_LIST_END, query.data()); + queries[i].pop(); + } + } +} + +void MetricBackend_opengl::endPass() { + // process rest of the queries (it can be the last frame) + processQueries(); + curPass++; +} + +void MetricBackend_opengl::pausePass() { + if (queryInProgress[QUERY_BOUNDARY_FRAME]) endQuery(QUERY_BOUNDARY_FRAME); + processQueries(); +} + +void MetricBackend_opengl::continuePass() { + // TODO if context switches check what it actually supports +} + +void MetricBackend_opengl::beginQuery(QueryBoundary boundary) { + // GPU related + if (glQueriesNeeded[boundary]) { + std::array<GLuint, QUERY_LIST_END> query; + glGenQueries(QUERY_LIST_END, query.data()); + + if (metrics[METRIC_GPU_START].profiled[boundary] || + (metrics[METRIC_GPU_DURATION].profiled[boundary] && supportsTimestamp)) + { + glQueryCounter(query[QUERY_GPU_START], GL_TIMESTAMP); + } + if (metrics[METRIC_GPU_DURATION].profiled[boundary] && !supportsTimestamp) { + glBeginQuery(GL_TIME_ELAPSED, query[QUERY_GPU_DURATION]); + } + if (metrics[METRIC_GPU_PIXELS].profiled[boundary]) { + glBeginQuery(GL_SAMPLES_PASSED, query[QUERY_OCCLUSION]); + } + queries[boundary].push(std::move(query)); + } + + + // CPU related + if (metrics[METRIC_CPU_START].profiled[boundary] || + metrics[METRIC_CPU_DURATION].profiled[boundary]) + { + cpuStart[boundary] = getCurrentTime(); + if (metrics[METRIC_CPU_START].profiled[boundary]) { + int64_t time = cpuStart[boundary] * cpuTimeScale - baseTime; + data[METRIC_CPU_START][boundary]->addData(boundary, time); + } + } + if (metrics[METRIC_CPU_VSIZE_START].profiled[boundary] || + metrics[METRIC_CPU_VSIZE_DURATION].profiled[boundary]) + { + vsizeStart[boundary] = os::getVsize(); + if (metrics[METRIC_CPU_VSIZE_START].profiled[boundary]) { + int64_t time = vsizeStart[boundary]; + data[METRIC_CPU_VSIZE_START][boundary]->addData(boundary, time); + } + } + if (metrics[METRIC_CPU_RSS_START].profiled[boundary] || + metrics[METRIC_CPU_RSS_DURATION].profiled[boundary]) + { + rssStart[boundary] = os::getRss(); + if (metrics[METRIC_CPU_RSS_START].profiled[boundary]) { + int64_t time = rssStart[boundary]; + data[METRIC_CPU_RSS_START][boundary]->addData(boundary, time); + } + } + queryInProgress[boundary] = true; + // DRAWCALL is a CALL + if (boundary == QUERY_BOUNDARY_DRAWCALL) beginQuery(QUERY_BOUNDARY_CALL); +} + +void MetricBackend_opengl::endQuery(QueryBoundary boundary) { + if (queryInProgress[boundary]) { + // CPU related + if (metrics[METRIC_CPU_DURATION].profiled[boundary]) + { + cpuEnd[boundary] = getCurrentTime(); + int64_t time = (cpuEnd[boundary] - cpuStart[boundary]) * cpuTimeScale; + data[METRIC_CPU_DURATION][boundary]->addData(boundary, time); + } + if (metrics[METRIC_CPU_VSIZE_DURATION].profiled[boundary]) + { + vsizeEnd[boundary] = os::getVsize(); + int64_t time = vsizeEnd[boundary] - vsizeStart[boundary]; + data[METRIC_CPU_VSIZE_DURATION][boundary]->addData(boundary, time); + } + if (metrics[METRIC_CPU_RSS_DURATION].profiled[boundary]) + { + rssEnd[boundary] = os::getRss(); + int64_t time = rssEnd[boundary] - rssStart[boundary]; + data[METRIC_CPU_RSS_DURATION][boundary]->addData(boundary, time); + } + // GPU related + if (glQueriesNeeded[boundary]) { + std::array<GLuint, QUERY_LIST_END> &query = queries[boundary].back(); + if (metrics[METRIC_GPU_DURATION].profiled[boundary] && supportsTimestamp) { + // GL_TIME_ELAPSED cannot be used in nested queries + // so prefer this if timestamps are supported + glQueryCounter(query[QUERY_GPU_DURATION], GL_TIMESTAMP); + } + if (metrics[METRIC_GPU_PIXELS].profiled[boundary]) { + glEndQuery(GL_SAMPLES_PASSED); + } + } + queryInProgress[boundary] = false; + } + // DRAWCALL is a CALL + if (boundary == QUERY_BOUNDARY_DRAWCALL) endQuery(QUERY_BOUNDARY_CALL); + // clear queries after each frame + if (boundary == QUERY_BOUNDARY_FRAME && glQueriesNeededAnyBoundary) { + processQueries(); + } +} + +void MetricBackend_opengl::enumDataQueryId(unsigned id, enumDataCallback callback, + QueryBoundary boundary, void* userData) { + for (int i = 0; i < METRIC_LIST_END; i++) { + Metric_opengl &metric = metrics[i]; + if (metric.enabled[boundary]) { + callback(&metric, id, data[i][boundary]->getData(boundary, id), 0, + userData); + } + } +} + +unsigned MetricBackend_opengl::getNumPasses() { + return twoPasses ? 2 : 1; +} + +MetricBackend_opengl& +MetricBackend_opengl::getInstance(glretrace::Context* context, MmapAllocator<char> &alloc) { + static MetricBackend_opengl backend(context, alloc); + return backend; +} diff --git a/retrace/metric_backend_opengl.hpp b/retrace/metric_backend_opengl.hpp new file mode 100644 index 00000000..325d3234 --- /dev/null +++ b/retrace/metric_backend_opengl.hpp @@ -0,0 +1,173 @@ +#pragma once + +#include <vector> +#include <string> +#include <map> +#include <queue> +#include <array> + +#include "glproc.hpp" +#include "metric_backend.hpp" +#include "glretrace.hpp" +#include "mmap_allocator.hpp" + +class Metric_opengl : public Metric +{ +private: + unsigned m_gId, m_id; + std::string m_name, m_desc; + MetricNumType m_nType; + MetricType m_type; + +public: + Metric_opengl(unsigned gId, unsigned id, const std::string &name, + const std::string &desc, MetricNumType nT, MetricType t); + + unsigned id(); + + unsigned groupId(); + + std::string name(); + + std::string description(); + + MetricNumType numType(); + + MetricType type(); + + // should be set by backend + bool available; + bool profiled[QUERY_BOUNDARY_LIST_END]; // profiled in cur pass + bool enabled[QUERY_BOUNDARY_LIST_END]; // enabled for profiling +}; + +class MetricBackend_opengl : public MetricBackend +{ +private: + MmapAllocator<char> alloc; + // storage class + class Storage + { + private: + std::deque<int64_t, MmapAllocator<int64_t>> data[QUERY_BOUNDARY_LIST_END]; + + public: +#ifdef _WIN32 + Storage(MmapAllocator<char> &alloc) { + for (auto &d : data) { + d = std::deque<int64_t, MmapAllocator<int64_t>>(alloc); + } + } +#else + Storage(MmapAllocator<char> &alloc) + : data{ std::deque<int64_t, MmapAllocator<int64_t>>(alloc), + std::deque<int64_t, MmapAllocator<int64_t>>(alloc), + std::deque<int64_t, MmapAllocator<int64_t>>(alloc) } {}; +#endif + void addData(QueryBoundary boundary, int64_t data); + int64_t* getData(QueryBoundary boundary, unsigned eventId); + }; + + // indexes into metrics vector + enum { + METRIC_CPU_START = 0, + METRIC_CPU_DURATION, + METRIC_GPU_START, + METRIC_GPU_DURATION, + METRIC_GPU_PIXELS, + METRIC_CPU_VSIZE_START, + METRIC_CPU_VSIZE_DURATION, + METRIC_CPU_RSS_START, + METRIC_CPU_RSS_DURATION, + METRIC_LIST_END + }; + + // indexes into queries + enum { + QUERY_GPU_START = 0, + QUERY_GPU_DURATION, + QUERY_OCCLUSION, + QUERY_LIST_END, + }; + + // lookup tables + std::map<std::pair<unsigned,unsigned>, Metric_opengl*> idLookup; + std::map<std::string, Metric_opengl*> nameLookup; + + // bools + bool supportsTimestamp, supportsElapsed, supportsOcclusion; + bool glQueriesNeeded[QUERY_BOUNDARY_LIST_END]; + bool glQueriesNeededAnyBoundary; + bool cpugpuSync; + bool twoPasses; // profiling in two passes + bool queryInProgress[QUERY_BOUNDARY_LIST_END]; + + unsigned curPass; + + std::vector<Metric_opengl> metrics; + // storage for metrics + std::unique_ptr<Storage> data[METRIC_LIST_END][QUERY_BOUNDARY_LIST_END]; + + // Helper vars for metrics + std::queue<std::array<GLuint, QUERY_LIST_END>> queries[QUERY_BOUNDARY_LIST_END]; + GLint64 baseTime; + double cpuTimeScale; + int64_t cpuStart[QUERY_BOUNDARY_LIST_END]; + int64_t cpuEnd[QUERY_BOUNDARY_LIST_END]; + int64_t vsizeStart[QUERY_BOUNDARY_LIST_END]; + int64_t vsizeEnd[QUERY_BOUNDARY_LIST_END]; + int64_t rssStart[QUERY_BOUNDARY_LIST_END]; + int64_t rssEnd[QUERY_BOUNDARY_LIST_END]; + + MetricBackend_opengl(glretrace::Context* context, MmapAllocator<char> &alloc); + + MetricBackend_opengl(MetricBackend_opengl const&) = delete; + + void operator=(MetricBackend_opengl const&) = delete; + +public: + bool isSupported(); + + void enumGroups(enumGroupsCallback callback, void* userData = nullptr); + + void enumMetrics(unsigned group, enumMetricsCallback callback, void* userData = nullptr); + + std::unique_ptr<Metric> getMetricById(unsigned groupId, unsigned metricId); + + std::unique_ptr<Metric> getMetricByName(std::string metricName); + + std::string getGroupName(unsigned group); + + int enableMetric(Metric* metric, QueryBoundary pollingRule = QUERY_BOUNDARY_DRAWCALL); + + unsigned generatePasses(); + + void beginPass(); + + void endPass(); + + void pausePass(); + + void continuePass(); + + void beginQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void endQuery(QueryBoundary boundary = QUERY_BOUNDARY_DRAWCALL); + + void enumDataQueryId(unsigned id, enumDataCallback callback, + QueryBoundary boundary, void* userData = nullptr); + + unsigned getNumPasses(); + + static MetricBackend_opengl& getInstance(glretrace::Context* context, + MmapAllocator<char> &alloc); + + +private: + int64_t getCurrentTime(void); + + int64_t getTimeFrequency(void); + + void processQueries(); +}; + diff --git a/retrace/metric_helper.cpp b/retrace/metric_helper.cpp new file mode 100644 index 00000000..865faa7d --- /dev/null +++ b/retrace/metric_helper.cpp @@ -0,0 +1,192 @@ +#include <string> +#include <cstring> +#include <vector> +#include <set> +#include <iostream> + +#include "retrace.hpp" +#include "metric_backend.hpp" +#include "metric_writer.hpp" +#include "metric_backend_amd_perfmon.hpp" +#include "metric_backend_intel_perfquery.hpp" +#include "metric_backend_opengl.hpp" +#include "mmap_allocator.hpp" + +namespace glretrace { + +bool metricBackendsSetup = false; +bool profilingContextAcquired = false; +bool profilingBoundaries[QUERY_BOUNDARY_LIST_END] = {false}; +unsigned profilingBoundariesIndex[QUERY_BOUNDARY_LIST_END] = {0}; +std::vector<MetricBackend*> metricBackends; // to be populated in initContext() +MetricBackend* curMetricBackend = nullptr; // backend active in the current pass +MetricWriter profiler(metricBackends, MmapAllocator<char>()); + +MetricBackend* getBackend(std::string backendName) { + // allocator for metric storage + MmapAllocator<char> alloc; + // to be populated with backends + Context *currentContext = getCurrentContext(); + if (backendName == "GL_AMD_performance_monitor") return &MetricBackend_AMD_perfmon::getInstance(currentContext, alloc); + else if (backendName == "GL_INTEL_performance_query") return &MetricBackend_INTEL_perfquery::getInstance(currentContext, alloc); + else if (backendName == "opengl") return &MetricBackend_opengl::getInstance(currentContext, alloc); + else return nullptr; +} + +bool +isLastPass() { + return ( retrace::curPass + 1 >= retrace::numPasses ); +} + +/* Callbacks for listing metrics with --list-metrics */ +void listMetrics_metricCallback(Metric* c, int error, void* userData) { + static const std::string metricType[] = {"CNT_TYPE_GENERIC", "CNT_TYPE_NUM", + "CNT_TYPE_DURATION", "CNT_TYPE_PERCENT", + "CNT_TYPE_TIMESTAMP", "CNT_TYPE_OTHER"}; + static const std::string metricNumType[] = {"CNT_NUM_UINT", "CNT_NUM_FLOAT", + "CNT_NUM_UINT64", "CNT_NUM_DOUBLE", + "CNT_NUM_BOOL", "CNT_NUM_INT64"}; + + std::cout << " Metric #" << c->id() << ": " << c->name() + << " (type: " << metricType[c->type()] << ", num. type: " + << metricNumType[c->numType()] << ").\n"; + std::cout << " Description: " << c->description() << "\n"; +} + +void listMetrics_groupCallback(unsigned g, int error, void* userData) { + MetricBackend* b = reinterpret_cast<MetricBackend*>(userData); + std::cout << "\n Group #" << g << ": " << b->getGroupName(g) << ".\n"; + b->enumMetrics(g, listMetrics_metricCallback, userData); +} + +void listMetricsCLI() { + // backends is to be populated with backend names + std::string backends[] = {"GL_AMD_performance_monitor", + "GL_INTEL_performance_query", + "opengl"}; + std::cout << "Available metrics: \n"; + for (auto s : backends) { + auto b = getBackend(s); + if (!b->isSupported()) { + continue; + } + std::cout << "\nBackend " << s << ":\n"; + b->enumGroups(listMetrics_groupCallback, b); + std::cout << std::endl; + } +} + +void parseMetricsBlock(QueryBoundary pollingRule, const char* str, + std::size_t limit, MetricBackend* backend) +{ + const char* end; + bool lastItem = false; + + while (((end = reinterpret_cast<const char*>(std::memchr(str, ',', limit))) != nullptr) + || !lastItem) + { + std::unique_ptr<Metric> p; + std::string metricName; + + if (!end) { + lastItem = true; + end = str + limit; + } + std::size_t span = std::strspn(str, " "); + limit -= span; + str += span; + // parse [group, id] + if (*str == '[') { + std::string groupStr = std::string(str, 1, end-str-1); + limit -= end + 1 - str; + str = end + 1; + end = reinterpret_cast<const char*>(std::memchr(str, ']', limit)); + std::string idStr = std::string(str, 0, end-str); + limit -= end + 1 - str; + str = end + 1; + const char* next = reinterpret_cast<const char*>(std::memchr(str, ',', limit)); + if (next) { + end = next; + limit -= end + 1 - str; + str = end + 1; + } +#if defined(ANDROID) + // http://stackoverflow.com/questions/17950814/how-to-use-stdstoul-and-stdstoull-in-android + unsigned groupId = strtoul(groupStr.c_str(), nullptr, 10); + unsigned metricId = strtoul(idStr.c_str(), nullptr, 10); +#else + unsigned groupId = std::stoul(groupStr); + unsigned metricId = std::stoul(idStr); +#endif + p = backend->getMetricById(groupId, metricId); + metricName = "[" + groupStr + ", " + idStr + "]"; + // parse metricName + } else { + if (end - str) { + metricName = std::string(str, 0, end-str); + p = backend->getMetricByName(metricName); + } + limit -= end + (lastItem ? 0 : 1) - str; + str = end + (lastItem ? 0 : 1); + if (metricName.empty()) continue; + } + if (!p) { + std::cerr << "Warning: No metric \"" << metricName + << "\"." << std::endl; + continue; + } + int error = backend->enableMetric(p.get(), pollingRule); + if (error) { + std::cerr << "Warning: Metric " << metricName << " not enabled" + " (error " << error << ")." << std::endl; + } else { + profilingBoundaries[pollingRule] = true; + } + } +} + +void parseBackendBlock(QueryBoundary pollingRule, const char* str, + std::size_t limit, std::set<MetricBackend*> &backendsHash) +{ + const char* delim = reinterpret_cast<const char*>(std::memchr(str, ':', limit)); + if (delim) { + std::size_t span = std::strspn(str, " "); + std::string backendName = std::string(str, span, delim-str-span); + MetricBackend* backend = getBackend(backendName); + if (!backend) { + std::cerr << "Warning: No backend \"" << backendName << "\"." + << std::endl; + return; + } + if (!backend->isSupported()) { + std::cerr << "Warning: Backend \"" << backendName + << "\" is not supported." << std::endl; + return; + } + + /** + * order in metricBackends is important for output + * also there should be no duplicates + */ + if (backendsHash.find(backend) == backendsHash.end()) { + metricBackends.push_back(backend); + backendsHash.insert(backend); + } + + limit -= (delim-str) + 1; + parseMetricsBlock(pollingRule, delim + 1, limit, backend); + } +} + +void enableMetricsFromCLI(const char* metrics, QueryBoundary pollingRule) { + static std::set<MetricBackend*> backendsHash; // for not allowing duplicates + const char* end; + + while ((end = std::strchr(metrics, ';')) != nullptr) { + parseBackendBlock(pollingRule, metrics, end-metrics, backendsHash); + metrics = end + 1; + } + parseBackendBlock(pollingRule, metrics, std::strlen(metrics), backendsHash); +} + +} /* namespace glretrace */ diff --git a/retrace/metric_writer.cpp b/retrace/metric_writer.cpp new file mode 100644 index 00000000..a2d8433c --- /dev/null +++ b/retrace/metric_writer.cpp @@ -0,0 +1,199 @@ +#include <iostream> + +#include "metric_writer.hpp" + +void ProfilerQuery::writeMetricHeaderCallback(Metric* metric, int event, void* data, int error, + void* userData) { + std::cout << "\t" << metric->name(); +} + +void ProfilerQuery::writeMetricEntryCallback(Metric* metric, int event, void* data, int error, + void* userData) { + if (error) { + std::cout << "\t" << "#ERR" << error; + return; + } + if (!data) { + std::cout << "\t" << "-"; + return; + } + switch(metric->numType()) { + case CNT_NUM_UINT: std::cout << "\t" << *(reinterpret_cast<unsigned*>(data)); break; + case CNT_NUM_FLOAT: std::cout << "\t" << *(reinterpret_cast<float*>(data)); break; + case CNT_NUM_DOUBLE: std::cout << "\t" << *(reinterpret_cast<double*>(data)); break; + case CNT_NUM_BOOL: std::cout << "\t" << *(reinterpret_cast<bool*>(data)); break; + case CNT_NUM_UINT64: std::cout << "\t" << *(reinterpret_cast<uint64_t*>(data)); break; + case CNT_NUM_INT64: std::cout << "\t" << *(reinterpret_cast<int64_t*>(data)); break; + } +} + +void ProfilerQuery::writeMetricHeader(QueryBoundary qb) const { + for (auto &a : *metricBackends) { + a->enumDataQueryId(eventId, &writeMetricHeaderCallback, qb); + } + std::cout << std::endl; +} + +void ProfilerQuery::writeMetricEntry(QueryBoundary qb) const { + for (auto &a : *metricBackends) { + a->enumDataQueryId(eventId, &writeMetricEntryCallback, qb); + } + std::cout << std::endl; +} + +template<typename T> +T ProfilerCall::StringTable<T>::getId(const std::string &str) { + auto res = stringLookupTable.find(str); + T index; + if (res == stringLookupTable.end()) { + index = static_cast<T>(strings.size()); + strings.push_back(str); + stringLookupTable[str] = index; + } else { + index = res->second; + } + return index; +} + +template<typename T> +std::string ProfilerCall::StringTable<T>::getString(T id) { + return strings[static_cast<typename decltype(stringLookupTable)::size_type>(id)]; +} + + +ProfilerCall::ProfilerCall(unsigned eventId, const data* queryData) + : ProfilerQuery(QUERY_BOUNDARY_CALL, eventId) +{ + if (queryData) { + isFrameEnd = queryData->isFrameEnd; + no = queryData->no; + program = queryData->program; + nameTableEntry = nameTable.getId(queryData->name); + } +} + + +void ProfilerCall::writeHeader() const { + std::cout << "#\tcall no\tprogram\tname"; + ProfilerQuery::writeMetricHeader(QUERY_BOUNDARY_CALL); +} + +void ProfilerCall::writeEntry() const { + if (isFrameEnd) { + std::cout << "frame_end" << std::endl; + } else { + std::cout << "call" + << "\t" << no + << "\t" << program + << "\t" << nameTable.getString(nameTableEntry); + ProfilerQuery::writeMetricEntry(QUERY_BOUNDARY_CALL); + } +} + + +void ProfilerDrawcall::writeHeader() const { + std::cout << "#\tcall no\tprogram\tname"; + ProfilerQuery::writeMetricHeader(QUERY_BOUNDARY_DRAWCALL); +} + +void ProfilerDrawcall::writeEntry() const { + if (isFrameEnd) { + std::cout << "frame_end" << std::endl; + } else { + std::cout << "call" + << "\t" << no + << "\t" << program + << "\t" << nameTable.getString(nameTableEntry); + ProfilerQuery::writeMetricEntry(QUERY_BOUNDARY_DRAWCALL); + } +} + + +void ProfilerFrame::writeHeader() const { + std::cout << "#"; + ProfilerQuery::writeMetricHeader(QUERY_BOUNDARY_FRAME); +} + +void ProfilerFrame::writeEntry() const { + std::cout << "frame"; + ProfilerQuery::writeMetricEntry(QUERY_BOUNDARY_FRAME); +} + + +MetricWriter::MetricWriter(std::vector<MetricBackend*> &metricBackends, + const MmapAllocator<char> &alloc) + : frameQueue(MmapAllocator<ProfilerCall>(alloc)), + callQueue(MmapAllocator<ProfilerCall>(alloc)), + drawcallQueue(MmapAllocator<ProfilerCall>(alloc)) +{ + ProfilerQuery::metricBackends = &metricBackends; +} + +void MetricWriter::addQuery(QueryBoundary boundary, unsigned eventId, + const void* queryData) +{ + switch (boundary) { + case QUERY_BOUNDARY_FRAME: + frameQueue.emplace_back(eventId); + break; + case QUERY_BOUNDARY_CALL: + callQueue.emplace_back(eventId, + reinterpret_cast<const ProfilerCall::data*>(queryData)); + break; + case QUERY_BOUNDARY_DRAWCALL: + drawcallQueue.emplace_back(eventId, + reinterpret_cast<const ProfilerCall::data*>(queryData)); + break; + default: + break; + } +} + +void MetricWriter::writeQuery(QueryBoundary boundary) { + switch (boundary) { + case QUERY_BOUNDARY_FRAME: + frameQueue.front().writeEntry(); + frameQueue.pop_front(); + break; + case QUERY_BOUNDARY_CALL: + callQueue.front().writeEntry(); + callQueue.pop_front(); + break; + case QUERY_BOUNDARY_DRAWCALL: + drawcallQueue.front().writeEntry(); + drawcallQueue.pop_front(); + break; + default: + break; + } +} + +void MetricWriter::writeAll(QueryBoundary boundary) { + switch (boundary) { + case QUERY_BOUNDARY_FRAME: + frameQueue.front().writeHeader(); + while (!frameQueue.empty()) { + writeQuery(boundary); + } + break; + case QUERY_BOUNDARY_CALL: + callQueue.front().writeHeader(); + while (!callQueue.empty()) { + writeQuery(boundary); + } + break; + case QUERY_BOUNDARY_DRAWCALL: + drawcallQueue.front().writeHeader(); + while (!drawcallQueue.empty()) { + writeQuery(boundary); + } + break; + default: + break; + } + std::cout << std::endl; +} + +std::vector<MetricBackend*>* ProfilerQuery::metricBackends = nullptr; + +ProfilerCall::StringTable<int16_t> ProfilerCall::nameTable; diff --git a/retrace/metric_writer.hpp b/retrace/metric_writer.hpp new file mode 100644 index 00000000..ddf9eaac --- /dev/null +++ b/retrace/metric_writer.hpp @@ -0,0 +1,99 @@ +#pragma once + +#include <queue> +#include <string> +#include <unordered_map> + +#include "metric_backend.hpp" +#include "mmap_allocator.hpp" + +class ProfilerQuery +{ +private: + unsigned eventId; + static void writeMetricHeaderCallback(Metric* metric, int event, void* data, int error, + void* userData); + static void writeMetricEntryCallback(Metric* metric, int event, void* data, int error, + void* userData); + +public: + static std::vector<MetricBackend*>* metricBackends; + + ProfilerQuery(QueryBoundary qb, unsigned eventId) + : eventId(eventId) {}; + void writeMetricHeader(QueryBoundary qb) const; + void writeMetricEntry(QueryBoundary qb) const; +}; + +class ProfilerCall : public ProfilerQuery +{ +public: + struct data { + bool isFrameEnd; + unsigned no; + unsigned program; + const char* name; + }; + +protected: + template<typename T> + class StringTable + { + private: + std::deque<std::string> strings; + std::unordered_map<std::string, T> stringLookupTable; + + public: + T getId(const std::string &str); + std::string getString(T id); + }; + + static StringTable<int16_t> nameTable; + + int16_t nameTableEntry; + bool isFrameEnd; + unsigned no; + unsigned program; + +public: + ProfilerCall(unsigned eventId, const data* queryData = nullptr); + void writeHeader() const; + void writeEntry() const; +}; + +class ProfilerDrawcall : public ProfilerCall +{ +public: + ProfilerDrawcall(unsigned eventId, const data* queryData) + : ProfilerCall( eventId, queryData) {}; + void writeHeader() const; + void writeEntry() const; +}; + +class ProfilerFrame : public ProfilerQuery +{ +public: + ProfilerFrame(unsigned eventId) + : ProfilerQuery(QUERY_BOUNDARY_FRAME, eventId) {}; + void writeHeader() const; + void writeEntry() const; +}; + +class MetricWriter +{ +private: + std::deque<ProfilerFrame, MmapAllocator<ProfilerFrame>> frameQueue; + std::deque<ProfilerCall, MmapAllocator<ProfilerCall>> callQueue; + std::deque<ProfilerDrawcall, MmapAllocator<ProfilerDrawcall>> drawcallQueue; + +public: + MetricWriter(std::vector<MetricBackend*> &metricBackends, + const MmapAllocator<char> &alloc); + + void addQuery(QueryBoundary boundary, unsigned eventId, + const void* queryData = nullptr); + + void writeQuery(QueryBoundary boundary); + + void writeAll(QueryBoundary boundary); +}; diff --git a/retrace/mmap_allocator.hpp b/retrace/mmap_allocator.hpp new file mode 100644 index 00000000..3826c65e --- /dev/null +++ b/retrace/mmap_allocator.hpp @@ -0,0 +1,143 @@ +#pragma once + +#include <cstddef> +#include <string> +#include <iostream> +#include <memory> +#include <list> + +#ifdef __unix__ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#include <stdlib.h> + +#define ALLOC_CHUNK_SIZE 64 * 1024 * 1024L + +/* + * Allocator that backs up memory with mmaped file + * File is grown by ALLOC_CHUNK_SIZE, this new region is mmaped then + * Nothing is deallocated +*/ + +class MmapedFileBuffer +{ +private: + int fd; + off_t curChunkSize; + const size_t chunkSize; + std::list<void*> mmaps; + void* vptr; + std::string fileName; + + MmapedFileBuffer(MmapedFileBuffer const&) = delete; + + void operator=(MmapedFileBuffer const&) = delete; + + void newMmap() { + ftruncate(fd, chunkSize * (mmaps.size() + 1)); + vptr = mmap(NULL, chunkSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, + chunkSize * mmaps.size()); + mmaps.push_front(vptr); + curChunkSize = 0; + } + +public: + MmapedFileBuffer() + : curChunkSize(0), + chunkSize(ALLOC_CHUNK_SIZE & ~(sysconf(_SC_PAGE_SIZE) - 1)) + { + char templ[] = ".pbtmpXXXXXX"; + fd = mkstemp(templ); + fileName = templ; + newMmap(); + } + + ~MmapedFileBuffer() { + close(fd); + for (auto &m : mmaps) { + munmap(m, chunkSize); + } + unlink(fileName.c_str()); + } + + void* allocate(size_t size) { + if ((curChunkSize + size) > chunkSize) { + newMmap(); + } + void* addr = static_cast<char*>(vptr) + curChunkSize; + curChunkSize += size; + return addr; + } +}; + +template <class T> +class MmapAllocator +{ +private: + std::shared_ptr<MmapedFileBuffer> file; + void* vptr; + + template<class U> + friend class MmapAllocator; + +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + + MmapAllocator() + { + file = std::make_shared<MmapedFileBuffer>(); + }; + + ~MmapAllocator() { + }; + + template <class U> MmapAllocator(const MmapAllocator<U>& other) + : file(other.file), vptr(other.vptr) { + }; + + T* allocate(std::size_t n) { + return reinterpret_cast<T*>(file->allocate(n * sizeof(T))); + }; + + void deallocate(T* p, std::size_t n) {}; + + template <typename U, typename... Args> + void construct(U* p, Args&&... args) {::new (static_cast<void*>(p) ) U (std::forward<Args> (args)...);} + + template <typename U> + void destroy(U* p) {p->~U();} + + size_type max_size() const { + return ALLOC_CHUNK_SIZE / sizeof(T); + } + + template<typename U> + struct rebind + { + typedef MmapAllocator<U> other; + }; +}; +template <class T, class U> +bool operator==(const MmapAllocator<T>&, const MmapAllocator<U>&) { + return true; +} +template <class T, class U> +bool operator!=(const MmapAllocator<T>& a, const MmapAllocator<U>& b) { + return !(a==b); +} + +#else +// default allocator +template<class T> +using MmapAllocator = std::allocator<T>; + +#endif diff --git a/retrace/retrace.hpp b/retrace/retrace.hpp index 1c9f0e61..082879f6 100644 --- a/retrace/retrace.hpp +++ b/retrace/retrace.hpp @@ -122,6 +122,15 @@ extern bool forceWindowed; /** * Add profiling data to the dump when retracing. */ +extern unsigned curPass; +extern unsigned numPasses; +extern bool profilingWithBackends; +extern char* profilingCallsMetricsString; +extern char* profilingFramesMetricsString; +extern char* profilingDrawCallsMetricsString; +extern bool profilingListMetrics; +extern bool profilingNumPasses; + extern bool profiling; extern bool profilingCpuTimes; extern bool profilingGpuTimes; diff --git a/retrace/retrace_main.cpp b/retrace/retrace_main.cpp index ec49a162..bc00ffe0 100644 --- a/retrace/retrace_main.cpp +++ b/retrace/retrace_main.cpp @@ -83,6 +83,15 @@ const char *driverModule = NULL; bool doubleBuffer = true; unsigned samples = 1; +unsigned curPass = 0; +unsigned numPasses = 1; +bool profilingWithBackends = false; +char* profilingCallsMetricsString; +char* profilingFramesMetricsString; +char* profilingDrawCallsMetricsString; +bool profilingListMetrics = false; +bool profilingNumPasses = false; + bool profiling = false; bool profilingGpuTimes = false; bool profilingCpuTimes = false; @@ -111,7 +120,6 @@ frameComplete(trace::Call &call) { } } - class DefaultDumper: public Dumper { public: @@ -599,6 +607,11 @@ usage(const char *argv0) { " --pgpu gpu profiling (gpu times per draw call)\n" " --ppd pixels drawn profiling (pixels drawn per draw call)\n" " --pmem memory usage profiling (vsize rss per call)\n" + " --pcalls call profiling metrics selection\n" + " --pframes frame profiling metrics selection\n" + " --pdrawcalls draw call profiling metrics selection\n" + " --list-metrics list all available metrics for TRACE\n" + " --gen-passes generate profiling passes and output passes number\n" " --call-nos[=BOOL] use call numbers in snapshot filenames\n" " --core use core profile\n" " --db use a double buffer visual (default)\n" @@ -631,6 +644,11 @@ enum { PGPU_OPT, PPD_OPT, PMEM_OPT, + PCALLS_OPT, + PFRAMES_OPT, + PDRAWCALLS_OPT, + PLMETRICS_OPT, + GENPASS_OPT, SB_OPT, SNAPSHOT_FORMAT_OPT, LOOP_OPT, @@ -660,6 +678,11 @@ longOptions[] = { {"pgpu", no_argument, 0, PGPU_OPT}, {"ppd", no_argument, 0, PPD_OPT}, {"pmem", no_argument, 0, PMEM_OPT}, + {"pcalls", required_argument, 0, PCALLS_OPT}, + {"pframes", required_argument, 0, PFRAMES_OPT}, + {"pdrawcalls", required_argument, 0, PDRAWCALLS_OPT}, + {"list-metrics", no_argument, 0, PLMETRICS_OPT}, + {"gen-passes", no_argument, 0, GENPASS_OPT}, {"sb", no_argument, 0, SB_OPT}, {"snapshot-prefix", required_argument, 0, 's'}, {"snapshot-format", required_argument, 0, SNAPSHOT_FORMAT_OPT}, @@ -837,6 +860,41 @@ int main(int argc, char **argv) retrace::profilingMemoryUsage = true; break; + case PCALLS_OPT: + retrace::debug = 0; + retrace::profiling = true; + retrace::verbosity = -1; + retrace::profilingWithBackends = true; + retrace::profilingCallsMetricsString = optarg; + break; + case PFRAMES_OPT: + retrace::debug = 0; + retrace::profiling = true; + retrace::verbosity = -1; + retrace::profilingWithBackends = true; + retrace::profilingFramesMetricsString = optarg; + break; + case PDRAWCALLS_OPT: + retrace::debug = 0; + retrace::profiling = true; + retrace::verbosity = -1; + retrace::profilingWithBackends = true; + retrace::profilingDrawCallsMetricsString = optarg; + break; + case PLMETRICS_OPT: + retrace::debug = 0; + retrace::profiling = true; + retrace::verbosity = -1; + retrace::profilingWithBackends = true; + retrace::profilingListMetrics = true; + break; + case GENPASS_OPT: + retrace::debug = 0; + retrace::profiling = true; + retrace::verbosity = -1; + retrace::profilingWithBackends = true; + retrace::profilingNumPasses = true; + break; default: std::cerr << "error: unknown option " << opt << "\n"; usage(argv[0]); @@ -861,30 +919,34 @@ int main(int argc, char **argv) #endif retrace::setUp(); - if (retrace::profiling) { + if (retrace::profiling && !retrace::profilingWithBackends) { retrace::profiler.setup(retrace::profilingCpuTimes, retrace::profilingGpuTimes, retrace::profilingPixelsDrawn, retrace::profilingMemoryUsage); } os::setExceptionCallback(exceptionCallback); - for (i = optind; i < argc; ++i) { - parser = new trace::Parser; - if (loopCount) { - parser = lastFrameLoopParser(parser, loopCount); - } + for (retrace::curPass = 0; retrace::curPass < retrace::numPasses; + retrace::curPass++) + { + for (i = optind; i < argc; ++i) { + parser = new trace::Parser; + if (loopCount) { + parser = lastFrameLoopParser(parser, loopCount); + } - if (!parser->open(argv[i])) { - return 1; - } + if (!parser->open(argv[i])) { + return 1; + } - retrace::mainLoop(); + retrace::mainLoop(); - parser->close(); + parser->close(); - delete parser; - parser = NULL; + delete parser; + parser = NULL; + } } - + os::resetExceptionCallback(); // XXX: X often hangs on XCloseDisplay |