diff options
author | Evgeny Pinchuk <evgeny.pinchuk@amd.com> | 2014-07-29 13:59:07 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@amd.com> | 2014-07-29 13:59:07 +0300 |
commit | 2c865d510ff7c66e5d0ce3c67379dd4b0ede4859 (patch) | |
tree | 2aa7ed3836a9fa55ddbd61a030810413d31eff68 | |
parent | a4f446e8cd6de16d67cead169b2ea2ada67833c9 (diff) |
Add pmc module
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
-rw-r--r-- | src/perfctr.c | 222 |
1 files changed, 206 insertions, 16 deletions
diff --git a/src/perfctr.c b/src/perfctr.c index 0f10f6a..e0d6ad0 100644 --- a/src/perfctr.c +++ b/src/perfctr.c @@ -25,8 +25,41 @@ #include <stdlib.h> #include "libhsakmt.h" +#include "pmc_table.h" #include "linux/kfd_ioctl.h" +#define BITS_PER_BYTE CHAR_BIT + +#define HSA_PERF_MAGIC4CC 0x54415348 + +enum perf_trace_state { + PERF_TRACE_STATE__STOPPED = 0, + PERF_TRACE_STATE__STARTED +}; + +struct perf_trace { + uint32_t magic4cc; + uint32_t gpu_id; + enum perf_trace_state state; +}; + +static HsaCounterProperties *counter_props[MAX_NODES] = {NULL}; + +static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid) +{ + int rc = 0; + switch (block_id) { + case PERFCOUNTER_BLOCKID__SQ: + *uuid = HSA_PROFILEBLOCK_AMD_SQ; + break; + default: + /* If we reach this point, it's a bug */ + rc = -1; + } + + return rc; +} + HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties( @@ -34,9 +67,71 @@ hsaKmtPmcGetCounterProperties( HsaCounterProperties** CounterProperties //OUT ) { - CHECK_KFD_OPEN(); + HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS; + uint32_t gpu_id, i, block_id; + uint16_t dev_id; + uint32_t counter_props_size = 0; + uint32_t total_counters = 0; + uint32_t total_concurrent = 0; + struct perf_counter_block block = {0}; + + if (CounterProperties == NULL) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (validate_nodeid(NodeId, &gpu_id) != 0) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + + - return HSAKMT_STATUS_NOT_SUPPORTED; + if (counter_props[NodeId] == NULL) { + dev_id = get_device_id_by_node(NodeId); + for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) { + rc = get_block_properties(dev_id, i, &block); + if (rc != HSAKMT_STATUS_SUCCESS) + return rc; + total_concurrent += block.num_of_slots; + total_counters += block.num_of_counters; + } + + counter_props_size = sizeof(HsaCounterProperties) + + sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) + + sizeof(HsaCounter)*(total_counters-1); + + counter_props[NodeId] = malloc(counter_props_size); + + if (counter_props[NodeId] == NULL) + return HSAKMT_STATUS_NO_MEMORY; + + counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX; + counter_props[NodeId]->NumConcurrent = total_concurrent; + + for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) + { + rc = get_block_properties(dev_id, block_id, &block); + if (rc != HSAKMT_STATUS_SUCCESS) { + free(counter_props[NodeId]); + return rc; + } + + /* Filling the SQ block */ + blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId); + counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters; + counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots; + + for (i = 0; i < block.num_of_counters; i++) { + counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id; + counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i]; + counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits; + counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask; + counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1; + counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE; + } + } + } + + *CounterProperties = counter_props[NodeId]; + + return HSAKMT_STATUS_SUCCESS; } /** @@ -52,9 +147,42 @@ hsaKmtPmcRegisterTrace( HsaPmcTraceRoot* TraceRoot //OUT ) { - CHECK_KFD_OPEN(); + uint32_t gpu_id, i; + uint64_t min_buf_size = 0; + uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0}; + struct perf_trace *trace = NULL; + + if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (validate_nodeid(NodeId, &gpu_id) != 0) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + + /* Calculating the minimum buffer size */ + for (i = 0; i < NumberOfCounters; i++) { + if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX) + return HSAKMT_STATUS_INVALID_PARAMETER; + min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE; + concurrent_counters[Counters[i].BlockIndex]++; + } - return HSAKMT_STATUS_NOT_SUPPORTED; + /* Verifying that the number of counters per block is not larger than the amount of slots */ + if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent) + return HSAKMT_STATUS_INVALID_PARAMETER; + + trace = malloc(sizeof(trace)); + if (trace == NULL) + return HSAKMT_STATUS_NO_MEMORY; + + trace->magic4cc = HSA_PERF_MAGIC4CC; + trace->gpu_id = gpu_id; + trace->state = PERF_TRACE_STATE__STOPPED; + + TraceRoot->NumberOfPasses = 1; + TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size); + TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace); + + return HSAKMT_STATUS_SUCCESS; } /** @@ -68,9 +196,33 @@ hsaKmtPmcUnregisterTrace( HSATraceId TraceId //IN ) { - CHECK_KFD_OPEN(); + uint32_t gpu_id; + struct perf_trace *trace; + + if (TraceId == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (validate_nodeid(NodeId, &gpu_id) != 0) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + + trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); - return HSAKMT_STATUS_NOT_SUPPORTED; + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; + + if (trace->gpu_id != gpu_id) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + + /* If the trace is in the running state, stop it */ + if (trace->state == PERF_TRACE_STATE__STARTED) { + HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId); + if (status != HSAKMT_STATUS_SUCCESS) + return status; + } + + free(trace); + + return HSAKMT_STATUS_SUCCESS; } @@ -86,9 +238,17 @@ hsaKmtPmcAcquireTraceAccess( HSATraceId TraceId //IN ) { - CHECK_KFD_OPEN(); + struct perf_trace *trace; + + if (TraceId == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); - return HSAKMT_STATUS_NOT_SUPPORTED; + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; + + return HSAKMT_STATUS_SUCCESS; } @@ -104,9 +264,17 @@ hsaKmtPmcReleaseTraceAccess( HSATraceId TraceId //IN ) { - CHECK_KFD_OPEN(); + struct perf_trace *trace; + + if (TraceId == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); - return HSAKMT_STATUS_NOT_SUPPORTED; + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; + + return HSAKMT_STATUS_SUCCESS; } @@ -122,9 +290,17 @@ hsaKmtPmcStartTrace( HSAuint64 TraceBufferSizeBytes //IN (page aligned) ) { - CHECK_KFD_OPEN(); + struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); + + if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; - return HSAKMT_STATUS_NOT_SUPPORTED; + trace->state = PERF_TRACE_STATE__STARTED; + + return HSAKMT_STATUS_SUCCESS; } @@ -138,9 +314,15 @@ hsaKmtPmcQueryTrace( HSATraceId TraceId //IN ) { - CHECK_KFD_OPEN(); + struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); + + if (TraceId == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; - return HSAKMT_STATUS_NOT_SUPPORTED; + return HSAKMT_STATUS_SUCCESS; } @@ -154,7 +336,15 @@ hsaKmtPmcStopTrace( HSATraceId TraceId //IN ) { - CHECK_KFD_OPEN(); + struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); + + if (TraceId == 0) + return HSAKMT_STATUS_INVALID_PARAMETER; + + if (trace->magic4cc != HSA_PERF_MAGIC4CC) + return HSAKMT_STATUS_INVALID_HANDLE; + + trace->state = PERF_TRACE_STATE__STOPPED; - return HSAKMT_STATUS_NOT_SUPPORTED; + return HSAKMT_STATUS_SUCCESS; } |