summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeny Pinchuk <evgeny.pinchuk@amd.com>2014-07-29 13:59:07 +0300
committerOded Gabbay <oded.gabbay@amd.com>2014-07-29 13:59:07 +0300
commit2c865d510ff7c66e5d0ce3c67379dd4b0ede4859 (patch)
tree2aa7ed3836a9fa55ddbd61a030810413d31eff68
parenta4f446e8cd6de16d67cead169b2ea2ada67833c9 (diff)
Add pmc module
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
-rw-r--r--src/perfctr.c222
1 files changed, 206 insertions, 16 deletions
diff --git a/src/perfctr.c b/src/perfctr.c
index 0f10f6a..e0d6ad0 100644
--- a/src/perfctr.c
+++ b/src/perfctr.c
@@ -25,8 +25,41 @@
#include <stdlib.h>
#include "libhsakmt.h"
+#include "pmc_table.h"
#include "linux/kfd_ioctl.h"
+#define BITS_PER_BYTE CHAR_BIT
+
+#define HSA_PERF_MAGIC4CC 0x54415348
+
+enum perf_trace_state {
+ PERF_TRACE_STATE__STOPPED = 0,
+ PERF_TRACE_STATE__STARTED
+};
+
+struct perf_trace {
+ uint32_t magic4cc;
+ uint32_t gpu_id;
+ enum perf_trace_state state;
+};
+
+static HsaCounterProperties *counter_props[MAX_NODES] = {NULL};
+
+static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
+{
+ int rc = 0;
+ switch (block_id) {
+ case PERFCOUNTER_BLOCKID__SQ:
+ *uuid = HSA_PROFILEBLOCK_AMD_SQ;
+ break;
+ default:
+ /* If we reach this point, it's a bug */
+ rc = -1;
+ }
+
+ return rc;
+}
+
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterProperties(
@@ -34,9 +67,71 @@ hsaKmtPmcGetCounterProperties(
HsaCounterProperties** CounterProperties //OUT
)
{
- CHECK_KFD_OPEN();
+ HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
+ uint32_t gpu_id, i, block_id;
+ uint16_t dev_id;
+ uint32_t counter_props_size = 0;
+ uint32_t total_counters = 0;
+ uint32_t total_concurrent = 0;
+ struct perf_counter_block block = {0};
+
+ if (CounterProperties == NULL)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ if (counter_props[NodeId] == NULL) {
+ dev_id = get_device_id_by_node(NodeId);
+ for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
+ rc = get_block_properties(dev_id, i, &block);
+ if (rc != HSAKMT_STATUS_SUCCESS)
+ return rc;
+ total_concurrent += block.num_of_slots;
+ total_counters += block.num_of_counters;
+ }
+
+ counter_props_size = sizeof(HsaCounterProperties) +
+ sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
+ sizeof(HsaCounter)*(total_counters-1);
+
+ counter_props[NodeId] = malloc(counter_props_size);
+
+ if (counter_props[NodeId] == NULL)
+ return HSAKMT_STATUS_NO_MEMORY;
+
+ counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
+ counter_props[NodeId]->NumConcurrent = total_concurrent;
+
+ for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
+ {
+ rc = get_block_properties(dev_id, block_id, &block);
+ if (rc != HSAKMT_STATUS_SUCCESS) {
+ free(counter_props[NodeId]);
+ return rc;
+ }
+
+ /* Filling the SQ block */
+ blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
+ counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
+ counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
+
+ for (i = 0; i < block.num_of_counters; i++) {
+ counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
+ }
+ }
+ }
+
+ *CounterProperties = counter_props[NodeId];
+
+ return HSAKMT_STATUS_SUCCESS;
}
/**
@@ -52,9 +147,42 @@ hsaKmtPmcRegisterTrace(
HsaPmcTraceRoot* TraceRoot //OUT
)
{
- CHECK_KFD_OPEN();
+ uint32_t gpu_id, i;
+ uint64_t min_buf_size = 0;
+ uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
+ struct perf_trace *trace = NULL;
+
+ if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ /* Calculating the minimum buffer size */
+ for (i = 0; i < NumberOfCounters; i++) {
+ if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+ min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
+ concurrent_counters[Counters[i].BlockIndex]++;
+ }
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ /* Verifying that the number of counters per block is not larger than the amount of slots */
+ if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = malloc(sizeof(trace));
+ if (trace == NULL)
+ return HSAKMT_STATUS_NO_MEMORY;
+
+ trace->magic4cc = HSA_PERF_MAGIC4CC;
+ trace->gpu_id = gpu_id;
+ trace->state = PERF_TRACE_STATE__STOPPED;
+
+ TraceRoot->NumberOfPasses = 1;
+ TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
+ TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
+
+ return HSAKMT_STATUS_SUCCESS;
}
/**
@@ -68,9 +196,33 @@ hsaKmtPmcUnregisterTrace(
HSATraceId TraceId //IN
)
{
- CHECK_KFD_OPEN();
+ uint32_t gpu_id;
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ if (trace->gpu_id != gpu_id)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ /* If the trace is in the running state, stop it */
+ if (trace->state == PERF_TRACE_STATE__STARTED) {
+ HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
+ if (status != HSAKMT_STATUS_SUCCESS)
+ return status;
+ }
+
+ free(trace);
+
+ return HSAKMT_STATUS_SUCCESS;
}
@@ -86,9 +238,17 @@ hsaKmtPmcAcquireTraceAccess(
HSATraceId TraceId //IN
)
{
- CHECK_KFD_OPEN();
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ return HSAKMT_STATUS_SUCCESS;
}
@@ -104,9 +264,17 @@ hsaKmtPmcReleaseTraceAccess(
HSATraceId TraceId //IN
)
{
- CHECK_KFD_OPEN();
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ return HSAKMT_STATUS_SUCCESS;
}
@@ -122,9 +290,17 @@ hsaKmtPmcStartTrace(
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
)
{
- CHECK_KFD_OPEN();
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ trace->state = PERF_TRACE_STATE__STARTED;
+
+ return HSAKMT_STATUS_SUCCESS;
}
@@ -138,9 +314,15 @@ hsaKmtPmcQueryTrace(
HSATraceId TraceId //IN
)
{
- CHECK_KFD_OPEN();
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ return HSAKMT_STATUS_SUCCESS;
}
@@ -154,7 +336,15 @@ hsaKmtPmcStopTrace(
HSATraceId TraceId //IN
)
{
- CHECK_KFD_OPEN();
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ trace->state = PERF_TRACE_STATE__STOPPED;
- return HSAKMT_STATUS_NOT_SUPPORTED;
+ return HSAKMT_STATUS_SUCCESS;
}