summaryrefslogtreecommitdiff
path: root/hsakmt
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2015-09-30 11:42:21 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2015-09-30 11:43:59 +0300
commit27675a5f87f0c11ab8a59f119518f627598c4caf (patch)
tree62dbe253bbd7df7b2e85d84668b89ce7adda6e86 /hsakmt
parentbbdfa9eeb6dd015f22479368d2440d62785a4bb8 (diff)
Move all source/header files to hsakmt subfolder
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'hsakmt')
-rw-r--r--hsakmt/Makefile53
-rw-r--r--hsakmt/debug.c249
-rw-r--r--hsakmt/events.c269
-rw-r--r--hsakmt/fmm.c486
-rw-r--r--hsakmt/fmm.h60
-rw-r--r--hsakmt/globals.c33
-rw-r--r--hsakmt/hsakmt.h577
-rw-r--r--hsakmt/hsakmttypes.h909
-rw-r--r--hsakmt/kfd_ioctl.h292
-rw-r--r--hsakmt/libhsakmt.c18
-rw-r--r--hsakmt/libhsakmt.h76
-rw-r--r--hsakmt/libhsakmt.ver46
-rw-r--r--hsakmt/memory.c204
-rw-r--r--hsakmt/openclose.c112
-rw-r--r--hsakmt/perfctr.c370
-rw-r--r--hsakmt/pmc_table.c134
-rw-r--r--hsakmt/pmc_table.h50
-rw-r--r--hsakmt/queues.c341
-rw-r--r--hsakmt/time.c61
-rw-r--r--hsakmt/topology.c991
-rw-r--r--hsakmt/version.c49
21 files changed, 5380 insertions, 0 deletions
diff --git a/hsakmt/Makefile b/hsakmt/Makefile
new file mode 100644
index 0000000..5608ab7
--- /dev/null
+++ b/hsakmt/Makefile
@@ -0,0 +1,53 @@
+# Include directories
+INCLUDES += ../include
+CFLAGS += $(foreach DIR,$(INCLUDES),-I$(DIR))
+
+LIB_NAME = libhsakmt.so
+LIB_MAJOR_VER = 1
+
+# Compiler options
+CFLAGS += -fPIC # Position-independent code required to build shared library
+CFLAGS += -W -Wall -Wextra -Werror -Wno-unused-parameter
+CFLAGS += -Wformat-security -Wswitch-default -Wundef \
+ -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual \
+ -Wlogical-op -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wredundant-decls \
+ -Wunreachable-code
+CFLAGS += -std=gnu99 -ggdb -pthread -fvisibility=hidden -O2
+
+LDFLAGS += -lrt -pthread -Wl,--version-script=libhsakmt.ver -Wl,-soname=$(LIB_NAME).$(LIB_MAJOR_VER)
+
+OBJS = debug.o globals.o memory.o perfctr.o time.o version.o \
+ events.o openclose.o queues.o topology.o fmm.o pmc_table.o \
+ libhsakmt.o
+
+.PHONY: all lnx lnx64a clean
+
+# Default target
+all: lnx lnx64a
+
+BUILD_ROOT = ../build
+BUILDDIR = $(BUILD_ROOT)/$(MAKECMDGOALS)
+
+TARGET = $(addprefix $(BUILDDIR)/,$(OBJS))
+
+$(BUILDDIR)/$(LIB_NAME).$(LIB_MAJOR_VER): $(TARGET)
+ gcc -shared $(LDFLAGS) -o $@ $^
+
+$(BUILDDIR)/$(LIB_NAME): $(BUILDDIR)/$(LIB_NAME).$(LIB_MAJOR_VER)
+ @ln -sf $(LIB_NAME).$(LIB_MAJOR_VER) $(BUILDDIR)/$(LIB_NAME)
+
+lnx: CFLAGS += -m32
+lnx: LDFLAGS += -m32
+lnx: $(BUILDDIR)/$(LIB_NAME)
+
+lnx64a: $(BUILDDIR)/$(LIB_NAME)
+
+clean:
+ rm -rf $(BUILD_ROOT)
+
+#Rule
+$(BUILDDIR)/%.o: %.c ../include/hsakmt.h ../include/hsakmttypes.h ../include/linux/kfd_ioctl.h
+ @echo Compiling $^
+ @mkdir -p $(dir $@)
+ gcc $(CFLAGS) -c $< -o $@
diff --git a/hsakmt/debug.c b/hsakmt/debug.c
new file mode 100644
index 0000000..46f72e7
--- /dev/null
+++ b/hsakmt/debug.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <string.h>
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgRegister(
+ HSAuint32 NodeId //IN
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ struct kfd_ioctl_dbg_register_args args;
+ memset(&args, 0, sizeof(args));
+ args.gpu_id = gpu_id;
+ long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_REGISTER, &args);
+
+ if (err == 0)
+ result = HSAKMT_STATUS_SUCCESS;
+ else
+ result = HSAKMT_STATUS_ERROR;
+
+ return (result);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgUnregister(
+ HSAuint32 NodeId //IN
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ struct kfd_ioctl_dbg_unregister_args args;
+ memset(&args, 0, sizeof(args));
+ args.gpu_id = gpu_id;
+ long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_UNREGISTER, &args);
+ if (err == 0)
+ result = HSAKMT_STATUS_SUCCESS;
+ else
+ result = HSAKMT_STATUS_ERROR;
+
+ return (result);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgWavefrontControl(
+ HSAuint32 NodeId, //IN
+ HSA_DBG_WAVEOP Operand, //IN
+ HSA_DBG_WAVEMODE Mode, //IN
+ HSAuint32 TrapId, //IN
+ HsaDbgWaveMessage* DbgWaveMsgRing //IN (? - see thunk API doc!)
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+
+ struct kfd_ioctl_dbg_wave_control_args *args;
+
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ /* Determine Size of the ioctl buffer */
+ uint32_t buff_size = sizeof(Operand) +
+ sizeof(Mode) + sizeof(TrapId) +
+ sizeof(DbgWaveMsgRing->DbgWaveMsg) +
+ sizeof(DbgWaveMsgRing->MemoryVA) +
+ sizeof(*args);
+
+ args = (struct kfd_ioctl_dbg_wave_control_args*) malloc(buff_size);
+ if (args == NULL)
+ return HSAKMT_STATUS_ERROR;
+
+ memset(args, 0, buff_size);
+
+ args->gpu_id = gpu_id;
+ args->buf_size_in_bytes = buff_size;
+
+ /* increment pointer to the start of the non fixed part */
+ unsigned char* run_ptr = (unsigned char*)args + sizeof(*args);
+
+ /* save variable content pointer for kfd */
+ args->content_ptr = (uint64_t) run_ptr;
+
+ /* insert items, and increment pointer accordingly */
+ *((HSA_DBG_WAVEOP*)run_ptr) = Operand;
+ run_ptr += sizeof(Operand);
+
+ *((HSA_DBG_WAVEMODE*)run_ptr) = Mode;
+ run_ptr += sizeof(Mode);
+
+ *((HSAuint32*)run_ptr) = TrapId;
+ run_ptr += sizeof(TrapId);
+
+ *((HsaDbgWaveMessageAMD*)run_ptr) = DbgWaveMsgRing->DbgWaveMsg;
+ run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg);
+
+ *((void**)run_ptr) = DbgWaveMsgRing->MemoryVA;
+ run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);
+
+ /* send to kernel */
+ long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL, args);
+
+ free (args);
+
+ if (err == 0)
+ return HSAKMT_STATUS_SUCCESS;
+ else
+ return HSAKMT_STATUS_ERROR;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgAddressWatch(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumWatchPoints, //IN
+ HSA_DBG_WATCH_MODE WatchMode[], //IN
+ void* WatchAddress[], //IN
+ HSAuint64 WatchMask[], //IN, optional
+ HsaEvent* WatchEvent[] //IN, optional
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ struct kfd_ioctl_dbg_address_watch_args *args;
+ uint32_t buff_size;
+ uint32_t watch_mask_items, watch_event_items;
+ HSAuint32 i;
+
+ /*
+ * Determine the size of the watch mask and event buffers
+ * the value is NULL if and only if no vector data should be attached
+ */
+
+ watch_mask_items = WatchMask[0] > 0 ? NumWatchPoints : 1;
+ watch_event_items = WatchEvent != NULL ? NumWatchPoints : 0;
+
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ if (NumWatchPoints > MAX_ALLOWED_NUM_POINTS)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ /*
+ * Size and structure of the ioctl buffer is dynamic in this case
+ * Here we calculate the buff size.
+ */
+
+ buff_size = sizeof(NumWatchPoints) +
+ (sizeof(WatchMode[0]) + sizeof(WatchAddress[0])) *
+ NumWatchPoints +
+ watch_mask_items * sizeof(HSAuint64) +
+ watch_event_items * sizeof(HsaEvent*)+
+ sizeof(*args);
+
+ args = (struct kfd_ioctl_dbg_address_watch_args*) malloc(buff_size);
+ if (args == NULL)
+ return HSAKMT_STATUS_ERROR;
+
+ memset(args, 0, buff_size);
+
+ args->gpu_id = gpu_id;
+ args->buf_size_in_bytes = buff_size;
+
+ /* increment pointer to the start of the non fixed part */
+ unsigned char* run_ptr = (unsigned char*)args + sizeof(*args);
+
+ /* save variable content pointer for kfd */
+ args->content_ptr = (uint64_t) run_ptr;
+
+ /* insert items, and increment pointer accordingly */
+ *((HSAuint32*)run_ptr) = NumWatchPoints;
+ run_ptr += sizeof(NumWatchPoints);
+
+ for (i = 0 ; i < NumWatchPoints ; i++) {
+ *((HSA_DBG_WATCH_MODE*)run_ptr) = WatchMode[i];
+ run_ptr += sizeof(WatchMode[i]);
+ }
+
+ for (i = 0 ; i < NumWatchPoints ; i++) {
+ *((void**)run_ptr) = WatchAddress[i];
+ run_ptr += sizeof(WatchAddress[i]);
+ }
+
+ for (i = 0 ; i < watch_mask_items ; i++) {
+ *((HSAuint64*)run_ptr) = WatchMask[i];
+ run_ptr += sizeof(WatchMask[i]);
+ }
+
+ for (i = 0 ; i < watch_event_items ; i++) {
+ *((HsaEvent**)run_ptr) = WatchEvent[i];
+ run_ptr += sizeof(WatchEvent[i]);
+ }
+
+ /* send to kernel */
+ long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH, args);
+
+ free (args);
+
+ if (err != 0)
+ return HSAKMT_STATUS_ERROR;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/events.c b/hsakmt/events.c
new file mode 100644
index 0000000..5d6835e
--- /dev/null
+++ b/hsakmt/events.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "linux/kfd_ioctl.h"
+
+static HSAuint64 *events_page = NULL;
+
+static bool IsSystemEventType(HSA_EVENTTYPE type)
+{
+ // Debug events behave as signal events.
+ return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateEvent(
+ HsaEventDescriptor* EventDesc, //IN
+ bool ManualReset, //IN
+ bool IsSignaled, //IN
+ HsaEvent** Event //OUT
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
+ {
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+
+ HsaEvent* e = malloc(sizeof(HsaEvent));
+ if (e == NULL)
+ {
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ memset(e, 0, sizeof(*e));
+
+ struct kfd_ioctl_create_event_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.event_type = EventDesc->EventType;
+ args.auto_reset = !ManualReset;
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
+ free(e);
+ *Event = NULL;
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ if (events_page == NULL && args.event_page_offset > 0) {
+ events_page = mmap(NULL, 4096, PROT_WRITE | PROT_READ,
+ MAP_SHARED, kfd_fd, args.event_page_offset);
+ if (events_page == NULL) {
+ hsaKmtDestroyEvent(e);
+ return HSAKMT_STATUS_ERROR;
+ }
+ }
+
+ if (args.event_page_offset > 0 && args.event_slot_index < KFD_SIGNAL_EVENT_LIMIT)
+ e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
+
+ e->EventId = args.event_id;
+ e->EventData.EventType = EventDesc->EventType;
+ e->EventData.HWData1 = args.event_id;
+
+ e->EventData.HWData3 = args.event_trigger_data;
+
+ if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) {
+ struct kfd_ioctl_set_event_args set_args;
+ memset(&set_args, 0, sizeof(set_args));
+ set_args.event_id = args.event_id;
+
+ kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &set_args);
+ }
+
+ *Event = e;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyEvent(
+ HsaEvent* Event //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (!Event)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ struct kfd_ioctl_destroy_event_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.event_id = Event->EventId;
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0) {
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ free(Event);
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetEvent(
+ HsaEvent* Event //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (!Event)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ /* Although the spec is doesn't say, don't allow system-defined events to be signaled. */
+ if (IsSystemEventType(Event->EventData.EventType))
+ return HSAKMT_STATUS_ERROR;
+
+ struct kfd_ioctl_set_event_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.event_id = Event->EventId;
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
+ return HSAKMT_STATUS_ERROR;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtResetEvent(
+ HsaEvent* Event //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (!Event)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ /* Although the spec is doesn't say, don't allow system-defined events to be signaled. */
+ if (IsSystemEventType(Event->EventData.EventType))
+ return HSAKMT_STATUS_ERROR;
+
+ struct kfd_ioctl_reset_event_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.event_id = Event->EventId;
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
+ return HSAKMT_STATUS_ERROR;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtQueryEventState(
+ HsaEvent* Event //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (!Event)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnEvent(
+ HsaEvent* Event, //IN
+ HSAuint32 Milliseconds //IN
+ )
+{
+ if (!Event)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ return hsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnMultipleEvents(
+ HsaEvent* Events[], //IN
+ HSAuint32 NumEvents, //IN
+ bool WaitOnAll, //IN
+ HSAuint32 Milliseconds //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ if (!Events)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ struct kfd_event_data *event_data = malloc(NumEvents * sizeof(struct kfd_event_data));
+ for (HSAuint32 i = 0; i < NumEvents; i++) {
+ event_data[i].event_id = Events[i]->EventId;
+ event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
+ }
+
+ struct kfd_ioctl_wait_events_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.wait_for_all = WaitOnAll;
+ args.timeout = Milliseconds;
+ args.num_events = NumEvents;
+ args.events_ptr = (uint64_t)(uintptr_t)event_data;
+
+ HSAKMT_STATUS result;
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1) {
+ result = HSAKMT_STATUS_ERROR;
+ }
+ else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT) {
+ result = HSAKMT_STATUS_WAIT_TIMEOUT;
+ }
+ else {
+ result = HSAKMT_STATUS_SUCCESS;
+ for (HSAuint32 i = 0; i < NumEvents; i++) {
+ if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY) {
+ Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
+ result = gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ goto out;
+ Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
+ Events[i]->EventData.EventData.MemoryAccessFault.Failure.ReadOnly = event_data[i].memory_exception_data.failure.ReadOnly;
+ Events[i]->EventData.EventData.MemoryAccessFault.Failure.NoExecute = event_data[i].memory_exception_data.failure.NoExecute;
+ Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
+ }
+ }
+ }
+out:
+ free(event_data);
+
+ return result;
+}
diff --git a/hsakmt/fmm.c b/hsakmt/fmm.c
new file mode 100644
index 0000000..a90fb95
--- /dev/null
+++ b/hsakmt/fmm.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "fmm.h"
+#include "linux/kfd_ioctl.h"
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+
+#define NON_VALID_GPU_ID 0
+#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
+#define INIT_APERTURE(base_value, limit_value) {.base = (void*)base_value, .limit = (void*)limit_value }
+#define INIT_MANAGEBLE_APERTURE(base_value, limit_value) {.base = (void*)base_value,.limit = (void*)limit_value, .vm_ranges = NULL, .vm_objects = NULL, .fmm_mutex = PTHREAD_MUTEX_INITIALIZER}
+#define INIT_GPU_MEM \
+{ .gpu_id = NON_VALID_GPU_ID,\
+ .lds_aperture = INIT_APERTURE(0, 0), \
+ .scratch_aperture = INIT_MANAGEBLE_APERTURE(0, 0),\
+ .gpuvm_aperture = INIT_MANAGEBLE_APERTURE(0, 0)\
+}
+
+#define INIT_GPUs_MEM {[0 ... (NUM_OF_SUPPORTED_GPUS-1)] = INIT_GPU_MEM}
+struct vm_object{
+ void* start;
+ HSAuint64 size;
+ HSAuint64 handle; // opaque
+ struct vm_object* next;
+ struct vm_object* prev;
+};
+typedef struct vm_object vm_object_t;
+
+struct vm_area{
+ void* start;
+ void* end;
+ struct vm_area* next;
+ struct vm_area* prev;
+};
+typedef struct vm_area vm_area_t;
+
+typedef struct {
+ void* base;
+ void* limit;
+ vm_area_t* vm_ranges;
+ vm_object_t* vm_objects;
+ pthread_mutex_t fmm_mutex;
+} manageble_aperture_t;
+
+typedef struct {
+ void* base;
+ void* limit;
+} aperture_t;
+
+typedef struct{
+ HSAuint32 gpu_id;
+ aperture_t lds_aperture;
+ manageble_aperture_t scratch_aperture;
+ manageble_aperture_t gpuvm_aperture;
+}gpu_mem_t;
+
+static gpu_mem_t gpu_mem[] = INIT_GPUs_MEM;
+
+static vm_area_t* vm_create_and_init_area(void* start, void* end){
+ vm_area_t* area = (vm_area_t*)malloc(sizeof(vm_area_t));// TODO: Memory pool ???
+ if (area){
+ area->start = start;
+ area->end = end;
+ area->next = area->prev = NULL;
+ }
+
+ return area;
+}
+
+static vm_object_t* vm_create_and_init_object(void* start, uint64_t size, uint64_t handle){
+ vm_object_t* object = (vm_object_t*)malloc(sizeof(vm_object_t)); // TODO: Memory pool ???
+ if (object){
+ object->start = start;
+ object->size = size;
+ object->handle = handle;
+ object->next = object->prev = NULL;
+ }
+
+ return object;
+}
+
+
+static void vm_remove_area(manageble_aperture_t* app, vm_area_t* area){
+ vm_area_t* next;
+ vm_area_t* prev;
+
+ next = area->next;
+ prev = area->prev;
+
+ if (prev == NULL )// The first element
+ app->vm_ranges = next;
+ else
+ prev->next = next;
+
+ if(next) // If not the last element
+ next->prev = prev;
+
+ free(area);
+
+}
+
+static void vm_remove_object(manageble_aperture_t* app, vm_object_t* object){
+ vm_object_t* next;
+ vm_object_t* prev;
+
+ next = object->next;
+ prev = object->prev;
+
+ if (prev == NULL )// The first element
+ app->vm_objects = next;
+ else
+ prev->next = next;
+
+ if(next) // If not the last element
+ next->prev = prev;
+
+ free(object);
+
+}
+
+
+
+static void vm_add_area_after(vm_area_t* after_this, vm_area_t* new_area){
+ vm_area_t* next = after_this->next;
+ after_this->next = new_area;
+ new_area->next = next;
+
+ new_area->prev = after_this;
+ if (next)
+ next->prev = new_area;
+}
+
+static void vm_add_object_before(vm_object_t* before_this, vm_object_t* new_object){
+ vm_object_t* prev = before_this->prev;
+ before_this->prev = new_object;
+ new_object->next = before_this;
+
+ new_object->prev = prev;
+ if (prev)
+ prev->next = new_object;
+}
+
+static void vm_split_area(manageble_aperture_t* app, vm_area_t* area, void* address, uint64_t MemorySizeInBytes){
+
+ // The existing area is split to: [area->start, address - 1] and [address + MemorySizeInBytes, area->end]
+ vm_area_t* new_area = vm_create_and_init_area(VOID_PTR_ADD(address,MemorySizeInBytes), area->end);
+
+ // Shrink the existing area
+ area->end = VOID_PTR_SUB(address,1);
+
+ vm_add_area_after(area, new_area);
+
+}
+
+static vm_object_t* vm_find_object_by_address(manageble_aperture_t* app, void* address, uint64_t size){
+ vm_object_t* cur = app->vm_objects;
+
+ // Look up the appropriate address range containing the given address
+ while(cur){
+ if(cur->start == address && cur->size == size)
+ break;
+ cur = cur->next;
+ };
+
+ return cur; // NULL if not found
+}
+
+static vm_area_t* vm_find(manageble_aperture_t* app, void* address){
+ vm_area_t* cur = app->vm_ranges;
+
+ // Look up the appropriate address range containing the given address
+ while(cur){
+ if(cur->start <= address && cur->end >= address)
+ break;
+ cur = cur->next;
+ };
+
+ return cur; // NULL if not found
+}
+
+static bool aperture_is_valid(void* app_base, void* app_limit){
+ if (app_base && app_limit && app_base < app_limit)
+ return true;
+ return false;
+}
+
+/*
+ * Assumes that fmm_mutex is locked on entry.
+ */
+static int aperture_release(manageble_aperture_t* app, void* address, uint64_t MemorySizeInBytes){
+ int rc = -1;
+ vm_area_t* area;
+
+ area = vm_find(app, address);
+ vm_object_t* object = vm_find_object_by_address(app, address, MemorySizeInBytes);
+ if (object && area){
+ vm_remove_object(app, object);
+ if (VOID_PTRS_SUB(area->end, area->start) + 1 > MemorySizeInBytes){ // the size of the released block is less than the size of area
+ if (area->start == address){ // shrink from the start
+ area->start = VOID_PTR_ADD(area->start,MemorySizeInBytes);
+ } else if (VOID_PTRS_SUB(area->end, address) + 1 == MemorySizeInBytes){ // shrink from the end
+ area->end = VOID_PTR_SUB(area->end, MemorySizeInBytes);
+ } else { // split the area
+ vm_split_area(app, area, address, MemorySizeInBytes);
+ }
+ rc = 0;
+ } else if (VOID_PTRS_SUB(area->end, area->start) + 1 == MemorySizeInBytes){ // the size of the released block is exactly the same as the size of area
+ vm_remove_area(app, area);
+ rc = 0;
+ } else {
+ //Inconsistent data. Fail it?
+ rc = -1;
+ }
+ }
+
+ return rc;
+}
+
+/*
+ * returns allocated address or NULL. Assumes, that fmm_mutex is locked on entry.
+ */
+static void* aperture_allocate(manageble_aperture_t* app, uint64_t MemorySizeInBytes){
+ vm_area_t* cur, *next, *new_area, *start;
+ vm_object_t* new_object;
+ void* new_address = NULL;
+ next = NULL;
+ new_area = NULL;
+
+ cur = app->vm_ranges;
+ if (cur){ // not empty
+
+ // Look up the appropriate address space "hole" or end of the list
+ while(cur){
+ next = cur->next;
+
+ // End of the list reached
+ if (!next)
+ break;
+
+ // address space "hole"
+ if ((VOID_PTRS_SUB(next->start,cur->end) >= MemorySizeInBytes))
+ break;
+
+ cur = next;
+ };
+
+ // If the new range is inside the reserved aperture
+ if (VOID_PTRS_SUB(app->limit, cur->end) + 1 >= MemorySizeInBytes){
+ // cur points to the last inspected element: the tail of the list or the found "hole"
+ // Just extend the existing region
+ new_address = VOID_PTR_ADD(cur->end, 1);
+ cur->end = VOID_PTR_ADD(cur->end, MemorySizeInBytes);
+ } else
+ new_address = NULL;
+
+ } else { // empty - create the first area
+ start = (void*)app->base;
+ new_area = vm_create_and_init_area(start, VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
+ if (new_area){
+ app->vm_ranges = new_area;
+ new_address = new_area->start;
+ }
+ }
+
+ // Allocate new object
+ if (new_address){
+ new_object = vm_create_and_init_object(new_address, MemorySizeInBytes, 0);
+ if (new_object){
+ if (app->vm_objects == NULL){ // empty list
+ // Update head
+ app->vm_objects = new_object;
+ } else {
+ // Add it before the first element
+ vm_add_object_before(app->vm_objects, new_object);
+ // Update head
+ app->vm_objects = new_object;
+ }
+ } else{
+ // Failed to allocate object: remove just allocated range and return NULL
+ aperture_release(app, new_address, MemorySizeInBytes);
+ new_address = NULL;
+ }
+ }
+
+ return new_address;
+
+}
+
+
+
+static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id){
+ int32_t i;
+
+ for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+ if(gpu_mem[i].gpu_id == gpu_id)
+ return i;
+ }
+
+ return -1;
+}
+
+bool fmm_is_inside_some_aperture(void* address){
+
+ int32_t i;
+
+ for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+ if(gpu_mem[i].gpu_id != NON_VALID_GPU_ID){
+ if ((address>= gpu_mem[i].lds_aperture.base) && (address<= gpu_mem[i].lds_aperture.limit))
+ return true;
+ if ((address>= gpu_mem[i].gpuvm_aperture.base) && (address<= gpu_mem[i].gpuvm_aperture.limit))
+ return true;
+ if ((address>= gpu_mem[i].scratch_aperture.base) && (address<= gpu_mem[i].scratch_aperture.limit))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#ifdef DEBUG_PRINT_APERTURE
+static void aperture_print(aperture_t* app){
+ printf("\t Base: %p\n", app->base);
+ printf("\t Limit: %p\n", app->limit);
+}
+
+static void manageble_aperture_print(manageble_aperture_t* app){
+ vm_area_t* cur = app->vm_ranges;
+ vm_object_t *object = app->vm_objects;
+
+ printf("\t Base: %p\n", app->base);
+ printf("\t Limit: %p\n", app->limit);
+ printf("\t Ranges: \n");
+ while(cur){
+ printf("\t\t Range [%p - %p] \n", cur->start, cur->end);
+ cur = cur->next;
+ };
+ printf("\t Objects: \n");
+ while(object){
+ printf("\t\t Object [%p - %" PRIu64 "] \n", object->start, object->size);
+ object = object->next;
+ };
+}
+
+void fmm_print(uint32_t gpu_id){
+ int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+ if(i >= 0){ // Found
+ printf("LDS aperture: \n");
+ aperture_print(&gpu_mem[i].lds_aperture);
+ printf("GPUVM aperture: \n");
+ manageble_aperture_print(&gpu_mem[i].gpuvm_aperture);
+ printf("Scratch aperture: \n");
+ manageble_aperture_print(&gpu_mem[i].scratch_aperture);
+
+ }
+}
+#else
+void fmm_print(uint32_t gpu_id){
+
+}
+#endif
+
+
+void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes){
+
+ void* mem = NULL;
+ int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+
+ // If not found or aperture isn't properly initialized/supported
+ if(i < 0 || !aperture_is_valid(gpu_mem[i].scratch_aperture.base, gpu_mem[i].scratch_aperture.limit))
+ return NULL;
+
+ pthread_mutex_lock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+ mem = aperture_allocate(&gpu_mem[i].scratch_aperture, MemorySizeInBytes);
+ pthread_mutex_unlock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+
+ return mem;
+}
+
+void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes){
+
+ void* mem = NULL;
+ int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+
+ // If not found or aperture isn't properly initialized/supported
+ if(i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base, gpu_mem[i].gpuvm_aperture.limit))
+ return NULL;
+
+ pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+ mem = aperture_allocate(&gpu_mem[i].gpuvm_aperture, MemorySizeInBytes);
+ pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+
+ return mem;
+}
+
+
+int fmm_release(void* address, uint64_t MemorySizeInBytes){
+
+ uint32_t i;
+ int32_t rc = -1;
+
+ for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+ if(gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
+ continue;
+
+ if (address >= gpu_mem[i].gpuvm_aperture.base && address <= gpu_mem[i].gpuvm_aperture.limit){
+ pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+ rc = aperture_release(&gpu_mem[i].gpuvm_aperture, address, MemorySizeInBytes);
+ pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+ fmm_print(gpu_mem[i].gpu_id);
+ } else if (address >= gpu_mem[i].scratch_aperture.base && address <= gpu_mem[i].scratch_aperture.limit)
+ pthread_mutex_lock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+ rc = aperture_release(&gpu_mem[i].scratch_aperture, address, MemorySizeInBytes);
+ pthread_mutex_unlock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+ }
+
+ return rc;
+}
+
+HSAKMT_STATUS fmm_init_process_apertures(){
+ struct kfd_ioctl_get_process_apertures_args args;
+ uint8_t node_id;
+
+ if (0 == kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void*)&args)){
+ for(node_id = 0; node_id < args.num_of_nodes; node_id++){
+ gpu_mem[node_id].gpu_id = args.process_apertures[node_id].gpu_id;
+ gpu_mem[node_id].lds_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_base);
+ gpu_mem[node_id].lds_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_limit);
+ gpu_mem[node_id].gpuvm_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_base);
+ gpu_mem[node_id].gpuvm_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_limit);
+ gpu_mem[node_id].scratch_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_base);
+ gpu_mem[node_id].scratch_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_limit);
+ }
+
+ return HSAKMT_STATUS_SUCCESS;
+ }
+
+ return HSAKMT_STATUS_ERROR;
+
+}
+
+HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id){
+ int32_t slot = gpu_mem_find_by_gpu_id(gpu_id);
+ if (slot<0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ switch(aperture_type){
+ case FMM_GPUVM:
+ return aperture_is_valid(gpu_mem[slot].gpuvm_aperture.base, gpu_mem[slot].gpuvm_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.base) : 0;
+ break;
+ case FMM_SCRATCH:
+ return aperture_is_valid(gpu_mem[slot].scratch_aperture.base, gpu_mem[slot].scratch_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.base) : 0;
+ break;
+ case FMM_LDS:
+ return aperture_is_valid(gpu_mem[slot].lds_aperture.base, gpu_mem[slot].lds_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.base) : 0;
+ break;
+ default:
+ return 0;
+ }
+
+}
diff --git a/hsakmt/fmm.h b/hsakmt/fmm.h
new file mode 100644
index 0000000..5924247
--- /dev/null
+++ b/hsakmt/fmm.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef FMM_H_
+#define FMM_H_
+
+#include "hsakmttypes.h"
+#include <stddef.h>
+
+typedef enum {
+ FMM_FIRST_APERTURE_TYPE = 0,
+ FMM_GPUVM = FMM_FIRST_APERTURE_TYPE,
+ FMM_LDS,
+ FMM_SCRATCH,
+ FMM_LAST_APERTURE_TYPE
+} aperture_type_e;
+
+typedef struct {
+ aperture_type_e app_type;
+ uint64_t size;
+ void* start_address;
+} aperture_properties_t;
+
+HSAKMT_STATUS fmm_init_process_apertures(void);
+/*
+ * Memory interface
+ */
+void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes);
+void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes);
+void fmm_print(uint32_t node);
+bool fmm_is_inside_some_aperture(void* address);
+int fmm_release(void* address, HSAuint64 MemorySizeInBytes);
+
+/* Topology interface*/
+HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
+HSAKMT_STATUS fmm_node_removed(HSAuint32 gpu_id);
+HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id);
+#endif /* FMM_H_ */
diff --git a/hsakmt/globals.c b/hsakmt/globals.c
new file mode 100644
index 0000000..cad6b1f
--- /dev/null
+++ b/hsakmt/globals.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+
+// HSAKMT global data
+
+int kfd_fd;
+unsigned long kfd_open_count;
+unsigned long system_properties_count;
+pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
diff --git a/hsakmt/hsakmt.h b/hsakmt/hsakmt.h
new file mode 100644
index 0000000..c87b3f8
--- /dev/null
+++ b/hsakmt/hsakmt.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMT_H_
+#define _HSAKMT_H_
+
+#include "hsakmttypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ "Opens" the HSA kernel driver for user-kernel mode communication.
+
+ On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
+ is responsible for user-kernel communication, this handle is used internally by
+ the thunk library to send device I/O control to the HSA kernel driver.
+ No other thunk library function may be called unless the user-kernel communication
+ channel is opened first.
+
+ On Linux this call opens the "/dev/kfd" device file to establish a communication
+ path to the kernel.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenKFD( void );
+
+/**
+ "Closes" the user-kernel communication path.
+
+ On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
+ no other communication with the kernel driver is possible after the successful
+ execution of the saKmdCloseKFD() function. Depending on the failure reason,
+ the user-kernel communication path may or may not be still active.
+
+ On Linux the function closes the "dev/kfd" device file.
+ No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
+ function is called again.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCloseKFD( void );
+
+
+/**
+ Returns the user-kernel interface version supported by KFD.
+ Higher major numbers usually add new features to KFD and may break user-kernel
+ compatibility; higher minor numbers define additional functionality associated
+ within a major number.
+ The calling software should validate that it meets the minimum interface version
+ as described in the API specification.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetVersion(
+ HsaVersionInfo* VersionInfo //OUT
+ );
+
+/**
+ The function takes a "snapshot" of the topology information within the KFD
+ to avoid any changes during the enumeration process.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAcquireSystemProperties(
+ HsaSystemProperties* SystemProperties //OUT
+ );
+
+/**
+ Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReleaseSystemProperties( void ) ;
+
+/**
+ Retrieves the discoverable sub-properties for a given HSA
+ node. The parameters returned allow the application or runtime to size the
+ management structures necessary to store the information.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeProperties(
+ HSAuint32 NodeId, //IN
+ HsaNodeProperties* NodeProperties //OUT
+ );
+
+/**
+ Retrieves the memory properties of a specific HSA node.
+ the memory pointer passed as MemoryProperties is sized as
+ NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ Some of the data returned is optional. Not all implementations may return all
+ parameters in the hsaMemoryProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeMemoryProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumBanks, //IN
+ HsaMemoryProperties* MemoryProperties //OUT
+ );
+
+/**
+ Retrieves the cache properties of a specific HSA node and processor ID.
+ ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
+ via the hsaKmtGetNodeProperties() call.
+ The memory pointer passed as CacheProperties is sized as
+ NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ The data returned is optional. Not all implementations may return all
+ parameters in the CacheProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeCacheProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 ProcessorId, //IN
+ HSAuint32 NumCaches, //IN
+ HsaCacheProperties* CacheProperties //OUT
+ );
+
+/**
+ Retrieves the HSA IO affinity properties of a specific HSA node.
+ the memory pointer passed as Properties is sized as
+ NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ The data returned is optional. Not all implementations may return all
+ parameters in the IoLinkProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeIoLinkProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumIoLinks, //IN
+ HsaIoLinkProperties* IoLinkProperties //OUT
+ );
+
+
+
+/**
+ Creates an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateEvent(
+ HsaEventDescriptor* EventDesc, //IN
+ bool ManualReset, //IN
+ bool IsSignaled, //IN
+ HsaEvent** Event //OUT
+ );
+
+/**
+ Destroys an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Sets the specified event object to the signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Sets the specified event object to the non-signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtResetEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Queries the state of the specified event object
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtQueryEventState(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Checks the current state of the event object. If the object's state is
+ nonsignaled, the calling thread enters the wait state.
+
+ The function returns when one of the following occurs:
+- The specified event object is in the signaled state.
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnEvent(
+ HsaEvent* Event, //IN
+ HSAuint32 Milliseconds //IN
+ );
+
+/**
+ Checks the current state of multiple event objects.
+
+ The function returns when one of the following occurs:
+- Either any one or all of the specified objects are in the signaled state
+ - if "WaitOnAll" is "true" the function returns when the state of all
+ objects in array is signaled
+ - if "WaitOnAll" is "false" the function returns when the state of any
+ one of the objects is set to signaled
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnMultipleEvents(
+ HsaEvent* Events[], //IN
+ HSAuint32 NumEvents, //IN
+ bool WaitOnAll, //IN
+ HSAuint32 Milliseconds //IN
+ );
+
+/**
+ new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
+ If used on other platforms the function will return HSAKMT_STATUS_ERROR
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReportQueue(
+ HSA_QUEUEID QueueId, //IN
+ HsaQueueReport* QueueReport //OUT
+ );
+
+/**
+ Creates a GPU queue with user-mode access rights
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateQueue(
+ HSAuint32 NodeId, //IN
+ HSA_QUEUE_TYPE Type, //IN
+ HSAuint32 QueuePercentage, //IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSizeInBytes, //IN
+ HsaEvent* Event, //IN
+ HsaQueueResource* QueueResource //OUT
+ );
+
+/**
+ Updates a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUpdateQueue(
+ HSA_QUEUEID QueueId, //IN
+ HSAuint32 QueuePercentage,//IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSize, //IN
+ HsaEvent* Event //IN
+ );
+
+/**
+ Destroys a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyQueue(
+ HSA_QUEUEID QueueId //IN
+ );
+
+/**
+ Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetMemoryPolicy(
+ HSAuint32 Node, //IN
+ HSAuint32 DefaultPolicy, //IN
+ HSAuint32 AlternatePolicy, //IN
+ void* MemoryAddressAlternate, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes //IN (page-aligned)
+ );
+/**
+ Allocates a memory buffer that may be accessed by the GPU
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAllocMemory(
+ HSAuint32 PreferredNode, //IN
+ HSAuint64 SizeInBytes, //IN (multiple of page size)
+ HsaMemFlags MemFlags, //IN
+ void** MemoryAddress //OUT (page-aligned)
+ );
+
+/**
+ Frees a memory buffer
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtFreeMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 SizeInBytes //IN
+ );
+
+/**
+ Registers with KFD a memory buffer that may be accessed by the GPU
+ This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtRegisterMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes //IN (page-aligned)
+ );
+
+
+/**
+ Unregisters with KFD a memory buffer
+ This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDeregisterMemory(
+ void* MemoryAddress //IN
+ );
+
+
+/**
+ Ensures that the memory is resident and can be accessed by GPU
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtMapMemoryToGPU(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes, //IN (page-aligned)
+ HSAuint64* AlternateVAGPU //OUT (page-aligned)
+ );
+
+/**
+ Releases the residency of the memory
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUnmapMemoryToGPU(
+ void* MemoryAddress //IN (page-aligned)
+ );
+
+
+/**
+ Notifies the kernel driver that a process wants to use GPU debugging facilities
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgRegister(
+ HSAuint32 NodeId //IN
+ );
+
+/**
+ Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgUnregister(
+ HSAuint32 NodeId //IN
+ );
+
+/**
+ Controls a wavefront
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgWavefrontControl(
+ HSAuint32 NodeId, //IN
+ HSA_DBG_WAVEOP Operand, //IN
+ HSA_DBG_WAVEMODE Mode, //IN
+ HSAuint32 TrapId, //IN
+ HsaDbgWaveMessage* DbgWaveMsgRing //IN
+ );
+
+/**
+ Sets watch points on memory address ranges to generate exception events when the
+ watched addresses are accessed
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgAddressWatch(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumWatchPoints, //IN
+ HSA_DBG_WATCH_MODE WatchMode[], //IN
+ void* WatchAddress[], //IN
+ HSAuint64 WatchMask[], //IN, optional
+ HsaEvent* WatchEvent[] //IN, optional
+ );
+
+/**
+ Gets GPU and CPU clock counters for particular Node
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetClockCounters(
+ HSAuint32 NodeId, //IN
+ HsaClockCounters* Counters //OUT
+ );
+
+/**
+ Retrieves information on the available HSA counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcGetCounterProperties(
+ HSAuint32 NodeId, //IN
+ HsaCounterProperties** CounterProperties //OUT
+ );
+
+/**
+ Registers a set of (HW) counters to be used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcRegisterTrace(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumberOfCounters, //IN
+ HsaCounter* Counters, //IN
+ HsaPmcTraceRoot* TraceRoot //OUT
+ );
+
+/**
+ Unregisters a set of (HW) counters used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcUnregisterTrace(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Allows a user mode process to get exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcAcquireTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Allows a user mode process to release exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcReleaseTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Starts tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStartTrace(
+ HSATraceId TraceId, //IN
+ void* TraceBuffer, //IN (page aligned)
+ HSAuint64 TraceBufferSizeBytes //IN (page aligned)
+ );
+
+/**
+ Forces an update of all the counters that a previously started trace operation has registered
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcQueryTrace(
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Stops tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStopTrace(
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetTrapHandler(
+ HSAuint32 NodeId, //IN
+ void* TrapHandlerBaseAddress, //IN
+ HSAuint64 TrapHandlerSizeInBytes, //IN
+ void* TrapBufferBaseAddress, //IN
+ HSAuint64 TrapBufferSizeInBytes //IN
+ );
+
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+#endif //_HSAKMT_H_
+
diff --git a/hsakmt/hsakmttypes.h b/hsakmt/hsakmttypes.h
new file mode 100644
index 0000000..a7e0a81
--- /dev/null
+++ b/hsakmt/hsakmttypes.h
@@ -0,0 +1,909 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMTTYPES_H_
+#define _HSAKMTTYPES_H_
+
+//the definitions and THUNK API are version specific - define the version numbers here
+#define HSAKMT_VERSION_MAJOR 0
+#define HSAKMT_VERSION_MINOR 99
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
+
+ #if defined(_WIN32)
+ #define HSAKMTAPI __stdcall
+ #else
+ #define HSAKMTAPI
+ #endif
+
+ typedef unsigned char HSAuint8;
+ typedef char HSAint8;
+ typedef unsigned short HSAuint16;
+ typedef signed short HSAint16;
+ typedef unsigned __int32 HSAuint32;
+ typedef signed __int64 HSAint64;
+ typedef unsigned __int64 HSAuint64;
+
+#elif defined(__linux__)
+
+#include <stdbool.h>
+#include <stdint.h>
+
+ #define HSAKMTAPI
+
+ typedef uint8_t HSAuint8;
+ typedef int8_t HSAint8;
+ typedef uint16_t HSAuint16;
+ typedef int16_t HSAint16;
+ typedef uint32_t HSAuint32;
+ typedef int64_t HSAint64;
+ typedef uint64_t HSAuint64;
+
+#endif
+
+typedef void* HSA_HANDLE;
+typedef HSAuint64 HSA_QUEUEID;
+
+// This is included in order to force the alignments to be 4 bytes so that
+// it avoids extra padding added by the compiler when a 64-bit binary is generated.
+#pragma pack(push, hsakmttypes_h, 4)
+
+//
+// HSA STATUS codes returned by the KFD Interfaces
+//
+
+typedef enum _HSAKMT_STATUS
+{
+ HSAKMT_STATUS_SUCCESS = 0, // Operation successful
+ HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified
+ HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver
+
+ HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid
+ HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid
+ HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid
+
+ HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory)
+ HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small
+
+ HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters
+ HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node
+ HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but
+ // may be at a later time)
+
+ HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
+ HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure
+ HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened
+ HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available
+ // (IOMMU driver not installed or not-available)
+
+ HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed
+ HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out
+
+ HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered
+ HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered
+ HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned
+
+} HSAKMT_STATUS;
+
+//
+// HSA KFD interface version information. Calling software has to validate that it meets
+// the minimum interface version as described in the API specification.
+// All future structures will be extended in a backward compatible fashion.
+//
+
+typedef struct _HsaVersionInfo
+{
+ HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version
+ HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version
+} HsaVersionInfo;
+
+//
+// HSA Topology Discovery Infrastructure structure definitions.
+// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
+// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
+// in the "Heterogeneous System Architecture Detail Topology" specification.
+//
+// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
+// When the call is made within a process context, a "snapshot" of the topology information
+// is taken within the KFD to avoid any changes during the enumeration process.
+// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
+// or when the process exits or is terminated.
+//
+
+typedef struct _HsaSystemProperties
+{
+ HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes.
+ // each node represents a discoverable node of the system
+ // All other enumeration is done on a per-node basis
+
+ HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT
+ HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT
+ HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID
+} HsaSystemProperties;
+
+
+typedef union
+{
+ HSAuint32 Value;
+ struct
+ {
+ unsigned int HotPluggable : 1; // the node may be removed by some system action
+ // (event will be sent)
+ unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible
+ // translation agent in the system (e.g. IOMMUv2)
+ unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary
+ // graphics render (= UI)
+ unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value
+ unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB
+ unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle
+ unsigned int VALimit : 1; // This node GPU has limited VA range for platform
+ // (typical 40bit). Affects shared VM use for 64bit apps
+ unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node.
+ unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value
+
+ unsigned int DoorbellType : 2; // 0: This node has pre-1.0 doorbell characteristic
+ // 1: This node has 1.0 doorbell characteristic
+ // 2,3: reserved for future use
+ unsigned int Reserved : 18;
+ } ui32;
+} HSA_CAPABILITY;
+
+
+//
+// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
+// The application or runtime can use the information herein to size the topology management structures
+// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
+// of throughput compute units (= SIMDs) associated with a H-NUMA node.
+//
+
+#define HSA_PUBLIC_NAME_SIZE 128
+
+typedef struct _HsaNodeProperties
+{
+ HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node.
+ // This value is 0 for a HSA node with no such cores,
+ // e.g a "discrete HSA GPU"
+ HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
+ // This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
+ HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node.
+ HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node.
+
+ HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node
+ // connecting to other nodes.
+
+ HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU)
+ // cores available on this node
+ HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU)
+ // units available on this node
+
+ HSA_CAPABILITY Capability; // see above
+
+ HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD.
+ // If NumFComputeCores is 0, this value is ignored.
+ HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront
+ HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
+
+ HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64,
+ // may be 32 or a different value for some HSA based architectures
+
+ HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2
+
+
+ HSAuint32 NumArrays; // Number of SIMD arrays per engine
+ HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array
+ HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU)
+
+ HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
+ // may be 0 if HW has no restrictions
+
+ HSAuint32 EngineId; // Identifier (rev) of teh GPU uEngine or Firmware, may be 0
+
+ HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes
+ HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes
+
+ HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device
+ // location in the overall system
+ HSAuint64 LocalMemSize; // Local memory size
+ HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and
+ HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities,
+
+ HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name).
+ // Unicode string
+} HsaNodeProperties;
+
+
+typedef enum _HSA_HEAPTYPE
+{
+ HSA_HEAPTYPE_SYSTEM = 0,
+ HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
+ HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
+ // All HSA accessible memory is per definition "CPU visible"
+ // "Private memory" is relevant for graphics interop only.
+ HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS)
+ HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS)
+ HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch)
+
+ HSA_HEAPTYPE_NUMHEAPTYPES,
+ HSA_HEAPTYPE_SIZE = 0xFFFFFFFF
+} HSA_HEAPTYPE;
+
+typedef union
+{
+ HSAuint32 MemoryProperty;
+ struct
+ {
+ unsigned int HotPluggable : 1; // the memory may be removed by some system action,
+ // memory should be used for temporary data
+ unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle.
+ unsigned int Reserved :30;
+ } ui32;
+} HSA_MEMORYPROPERTY;
+
+
+//
+// Discoverable HSA Memory properties.
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+//
+
+typedef struct _HsaMemoryProperties
+{
+ HSA_HEAPTYPE HeapType; // system or frame buffer,
+ union
+ {
+ HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes
+ struct
+ {
+ HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit)
+ HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
+ } ui32;
+ };
+ HSA_MEMORYPROPERTY Flags; // See definitions above
+
+ HSAuint32 Width; // memory width - the number of parallel bits of the memory interface
+ HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth
+ // to the memory when needed
+ HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory
+ // in process virtual space
+} HsaMemoryProperties;
+
+//
+// Discoverable Cache Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+#define HSA_CPU_SIBLINGS 256
+#define HSA_PROCESSORID_ALL 0xFFFFFFFF
+
+typedef union
+{
+ HSAuint32 Value;
+ struct
+ {
+ unsigned int Data : 1;
+ unsigned int Instruction : 1;
+ unsigned int CPU : 1;
+ unsigned int HSACU : 1;
+ unsigned int Reserved :28;
+ } ui32;
+} HsaCacheType;
+
+typedef struct _HaCacheProperties
+{
+ HSAuint32 ProcessorIdLow; // Identifies the processor number
+
+ HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc
+ HSAuint32 CacheSize; // Size of the cache
+ HSAuint32 CacheLineSize; // Cache line size in bytes
+ HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag
+ HSAuint32 CacheAssociativity; // Cache Associativity
+ HSAuint32 CacheLatency; // Cache latency in ns
+ HsaCacheType CacheType;
+ HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCacheProperties;
+
+
+//
+// Discoverable CPU Compute Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef struct _HsaCComputeProperties
+{
+ HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCComputeProperties;
+
+//
+// Discoverable IoLink Properties (optional).
+// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef enum _HSA_IOLINKTYPE {
+ HSA_IOLINKTYPE_UNDEFINED = 0,
+ HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
+ HSA_IOLINKTYPE_PCIEXPRESS = 2,
+ HSA_IOLINKTYPE_AMBA = 3,
+ HSA_IOLINKTYPE_MIPI = 4,
+ HSA_IOLINKTYPE_OTHER = 5,
+ HSA_IOLINKTYPE_NUMIOLINKTYPES,
+ HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF
+} HSA_IOLINKTYPE;
+
+typedef union
+{
+ HSAuint32 LinkProperty;
+ struct
+ {
+ unsigned int Override : 1; // bus link properties are determined by this structure
+ // not by the HSA_IOLINKTYPE. The other flags are valid
+ // only if this bit is set to one
+ unsigned int NonCoherent : 1; // The link doesn't support coherent transactions
+ // memory accesses across must not be set to "host cacheable"!
+ unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions
+ unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions
+ unsigned int Reserved :28;
+ } ui32;
+} HSA_LINKPROPERTY;
+
+
+typedef struct _HsaIoLinkProperties
+{
+ HSA_IOLINKTYPE IoLinkType; // see above
+ HSAuint32 VersionMajor; // Bus interface version (optional)
+ HSAuint32 VersionMinor; // Bus interface version (optional)
+
+ HSAuint32 NodeFrom; //
+ HSAuint32 NodeTo; //
+
+ HSAuint32 Weight; // weight factor (derived from CDIT)
+
+ HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns)
+ HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns)
+ HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s
+ HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s
+ HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes
+ HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms)
+} HsaIoLinkProperties;
+
+//
+// Memory allocation definitions for the KFD HSA interface
+//
+
+typedef struct _HsaMemFlags
+{
+ union
+ {
+ struct
+ {
+ unsigned int NonPaged : 1; // default = 0: pageable memory
+ unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
+ unsigned int ReadOnly : 1; // default = 0: Read/Write memory
+ unsigned int PageSize : 2; // see HSA_PAGE_SIZE
+ unsigned int HostAccess : 1; // default = 0: GPU access only
+ unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
+ // discrete GPU local), allocation may fall back to system memory node 0
+ // memory (= always available). Otherwise no allocation is possible.
+ unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap.
+ // HostAccess must be 0, all other flags (except NoSubstitute) should
+ // be 0 when setting this entry to 1. GDS allocation may fail due to
+ // limited resources. Application code is required to work without
+ // any allocated GDS memory using regular memory.
+ // Allocation fails on any node without GPU function.
+ unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
+ // HostAccess must be 0, all other flags (except NoSubstitute) should be 0
+ // when setting this entry to 1. Scratch allocation may fail due to limited
+ // resources. Application code is required to work without any allocation.
+ // Allocation fails on any node without GPU function.
+ unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow
+ // atomic ops processing. On AMD APU, this will use the ATC path on system
+ // memory, irrespective of the NonPaged flag setting (= if NonPaged is set,
+ // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM).
+ // All atomic ops must be supported on this memory.
+ unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however
+ // focused on AMD discrete GPU that support PCIe atomics; the memory
+ // allocation is mapped to allow for PCIe atomics to operate on system
+ // memory, irrespective of NonPaged set or the presence of an ATC path
+ // in the system. The atomic operations supported are limited to SWAP,
+ // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic
+ // increment and decrement via 2-complement arithmetic), which are the
+ // only atomic ops directly supported in PCI Express.
+ // On AMD APU, setting this flag will allocate the same type of memory
+ // as AtomicAccessFull, but it will be considered compatible with
+ // discrete GPU atomic operations access.
+ unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed
+ // for executable code (e.g. queue memory) by the host CPU or the device.
+ // Influences the page attribute setting within the allocation
+ unsigned int Reserved : 19;
+
+ } ui32;
+ HSAuint32 Value;
+ };
+} HsaMemFlags;
+
+typedef enum _HSA_CACHING_TYPE
+{
+ HSA_CACHING_CACHED = 0,
+ HSA_CACHING_NONCACHED = 1,
+ HSA_CACHING_WRITECOMBINED = 2,
+ HSA_CACHING_RESERVED = 3,
+ HSA_CACHING_NUM_CACHING,
+ HSA_CACHING_SIZE = 0xFFFFFFFF
+} HSA_CACHING_TYPE;
+
+typedef enum _HSA_PAGE_SIZE
+{
+ HSA_PAGE_SIZE_4KB = 0,
+ HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems
+ HSA_PAGE_SIZE_2MB = 2,
+ HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems
+} HSA_PAGE_SIZE;
+
+
+typedef enum _HSA_DEVICE
+{
+ HSA_DEVICE_CPU = 0,
+ HSA_DEVICE_GPU = 1,
+ MAX_HSA_DEVICE = 2
+} HSA_DEVICE;
+
+
+typedef enum _HSA_QUEUE_PRIORITY
+{
+ HSA_QUEUE_PRIORITY_MINIMUM = -3,
+ HSA_QUEUE_PRIORITY_LOW = -2,
+ HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1,
+ HSA_QUEUE_PRIORITY_NORMAL = 0,
+ HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1,
+ HSA_QUEUE_PRIORITY_HIGH = 2,
+ HSA_QUEUE_PRIORITY_MAXIMUM = 3,
+ HSA_QUEUE_PRIORITY_NUM_PRIORITY,
+ HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF
+} HSA_QUEUE_PRIORITY;
+
+typedef enum _HSA_QUEUE_TYPE
+{
+ HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue
+ HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+ HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue
+ HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue
+
+ // the following values indicate a queue type permitted to reference OS graphics
+ // resources through the interoperation API. See [5] "HSA Graphics Interoperation
+ // specification" for more details on use of such resources.
+
+ HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue
+ HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+ HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
+ HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue
+
+ HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue
+ HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue
+
+ // more types in the future
+
+ HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum
+} HSA_QUEUE_TYPE;
+
+typedef struct _HsaQueueResource
+{
+ HSA_QUEUEID QueueId; /** queue ID */
+ /** Doorbell address to notify HW of a new dispatch */
+ union
+ {
+ HSAuint32* Queue_DoorBell;
+ HSAuint64* Queue_DoorBell_aql;
+ HSAuint64 QueueDoorBell;
+ };
+
+ /** virtual address to notify HW of queue write ptr value */
+ union
+ {
+ HSAuint32* Queue_write_ptr;
+ HSAuint64* Queue_write_ptr_aql;
+ HSAuint64 QueueWptrValue;
+ };
+
+ /** virtual address updated by HW to indicate current read location */
+ union
+ {
+ HSAuint32* Queue_read_ptr;
+ HSAuint64* Queue_read_ptr_aql;
+ HSAuint64 QueueRptrValue;
+ };
+
+} HsaQueueResource;
+
+
+//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
+typedef struct _HsaQueueReport
+{
+ HSAuint32 VMID; //Required on SI to dispatch IB in primary ring
+ void* QueueAddress; //virtual address of UM mapped compute ring
+ HSAuint64 QueueSize; //size of the UM mapped compute ring
+} HsaQueueReport;
+
+
+
+typedef enum _HSA_DBG_WAVEOP
+{
+ HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront
+ HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront
+ HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront
+ HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode
+ HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap
+ HSA_DBG_NUM_WAVEOP = 5,
+ HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+} HSA_DBG_WAVEOP;
+
+typedef enum _HSA_DBG_WAVEMODE
+{
+ HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave
+ //Broadcast to all wavefronts of all processes is not supported for HSA user mode
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU
+ HSA_DBG_NUM_WAVEMODE = 3,
+ HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+} HSA_DBG_WAVEMODE;
+
+
+typedef enum _HSA_DBG_WAVEMSG_TYPE
+{
+ HSA_DBG_WAVEMSG_AUTO = 0,
+ HSA_DBG_WAVEMSG_USER = 1,
+ HSA_DBG_WAVEMSG_ERROR = 2,
+ HSA_DBG_NUM_WAVEMSG,
+ HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+} HSA_DBG_WAVEMSG_TYPE;
+
+typedef enum _HSA_DBG_WATCH_MODE
+{
+ HSA_DBG_WATCH_READ = 0, //Read operations only
+ HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only
+ HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only
+ HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations
+ HSA_DBG_WATCH_NUM,
+ HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+} HSA_DBG_WATCH_MODE;
+
+
+//This structure is hardware specific and may change in the future
+typedef struct _HsaDbgWaveMsgAMDGen2
+{
+ HSAuint32 Value;
+ HSAuint32 Reserved2;
+
+} HsaDbgWaveMsgAMDGen2;
+
+typedef union _HsaDbgWaveMessageAMD
+{
+ HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+ //for future HsaDbgWaveMsgAMDGen3;
+} HsaDbgWaveMessageAMD;
+
+typedef struct _HsaDbgWaveMessage
+{
+ void* MemoryVA; // ptr to associated host-accessible data
+ HsaDbgWaveMessageAMD DbgWaveMsg;
+} HsaDbgWaveMessage;
+
+
+//
+// HSA sync primitive, Event and HW Exception notification API definitions
+// The API functions allow the runtime to define a so-called sync-primitive, a SW object
+// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
+// through a defined GPU interrupt. A syncvar is a process virtual memory location of
+// a certain size that can be accessed by CPU and GPU shader code within the process to set
+// and query the content within that memory. The definition of the content is determined by
+// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
+// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
+// user mode instruction stream.
+// The OS scheduler event is typically associated and signaled by an interrupt issued by
+// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
+// surfaced by the KFD by this mechanism, too.
+//
+
+// these are the new definitions for events
+typedef enum _HSA_EVENTTYPE
+{
+ HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal
+ HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach)
+ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop )
+ HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event
+ HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info
+ HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging
+ HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling
+ HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4)
+ HSA_EVENTTYPE_MEMORY = 8, //GPU signal for signaling memory access faults and memory subsystem issues
+ //...
+ HSA_EVENTTYPE_MAXID,
+ HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE;
+
+typedef HSAuint32 HSA_EVENTID;
+
+//
+// Subdefinitions for various event types: Syncvar
+//
+
+typedef struct _HsaSyncVar
+{
+ union
+ {
+ void* UserData; //pointer to user mode data
+ HSAuint64 UserDataPtrValue; //64bit compatibility of value
+ } SyncVar;
+ HSAuint64 SyncVarSize;
+} HsaSyncVar;
+
+//
+// Subdefinitions for various event types: NodeChange
+//
+
+typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
+{
+ HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+ HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+ HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE_NODECHANGE_FLAGS;
+
+typedef struct _HsaNodeChange
+{
+ HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform
+} HsaNodeChange;
+
+//
+// Sub-definitions for various event types: DeviceStateChange
+//
+
+typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
+{
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available)
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable)
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
+
+typedef struct _HsaDeviceStateChange
+{
+ HSAuint32 NodeId; // F-NUMA node that contains the device
+ HSA_DEVICE Device; // device type: GPU or CPU
+ HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags
+} HsaDeviceStateChange;
+
+//
+// Sub-definitions for various event types: Memory exception
+//
+
+typedef enum _HSA_EVENTID_MEMORYFLAGS
+{
+ HSA_EVENTID_MEMORY_RECOVERABLE = 0, //access fault, recoverable after page adjustment
+ HSA_EVENTID_MEMORY_FATAL_PROCESS = 1, //memory access requires process context destruction, unrecoverable
+ HSA_EVENTID_MEMORY_FATAL_VM = 2, //memory access requires all GPU VA context destruction, unrecoverable
+} HSA_EVENTID_MEMORYFLAGS;
+
+typedef struct _HsaAccessAttributeFailure
+{
+ unsigned int NotPresent : 1; // Page not present or supervisor privilege
+ unsigned int ReadOnly : 1; // Write access to a read-only page
+ unsigned int NoExecute : 1; // Execute access to a page marked NX
+ unsigned int GpuAccess : 1; // Host access only
+ unsigned int ECC : 1; // ECC failure (if supported by HW)
+ unsigned int Reserved : 27; // must be 0
+} HsaAccessAttributeFailure;
+
+// data associated with HSA_EVENTID_MEMORY
+typedef struct _HsaMemoryAccessFault
+{
+ HSAuint32 NodeId; // H-NUMA node that contains the device where the memory access occurred
+ HSAuint64 VirtualAddress; // virtual address this occurred on
+ HsaAccessAttributeFailure Failure; // failure attribute
+ HSA_EVENTID_MEMORYFLAGS Flags; // event flags
+} HsaMemoryAccessFault;
+
+typedef struct _HsaEventData
+{
+ HSA_EVENTTYPE EventType; //event type
+
+ union
+ {
+ // return data associated with HSA_EVENTTYPE_SIGNAL and other events
+ HsaSyncVar SyncVar;
+
+ // data associated with HSA_EVENTTYPE_NODE_CHANGE
+ HsaNodeChange NodeChangeState;
+
+ // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
+ HsaDeviceStateChange DeviceState;
+
+ // data associated with HSA_EVENTTYPE_MEMORY
+ HsaMemoryAccessFault MemoryAccessFault;
+
+ } EventData;
+
+ // the following data entries are internal to the KFD & thunk itself.
+
+ HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle)
+ HSAuint64 HWData2; // internal thunk store for Event data (HWAddress)
+ HSAuint32 HWData3; // internal thunk store for Event data (HWData)
+} HsaEventData;
+
+
+typedef struct _HsaEventDescriptor
+{
+ HSA_EVENTTYPE EventType; // event type to allocate
+ HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source
+ HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
+} HsaEventDescriptor;
+
+
+typedef struct _HsaEvent
+{
+ HSA_EVENTID EventId;
+ HsaEventData EventData;
+} HsaEvent;
+
+typedef enum _HsaEventTimeout
+{
+ HSA_EVENTTIMEOUT_IMMEDIATE = 0,
+ HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF
+} HsaEventTimeOut;
+
+typedef struct _HsaClockCounters
+{
+ HSAuint64 GPUClockCounter;
+ HSAuint64 CPUClockCounter;
+ HSAuint64 SystemClockCounter;
+ HSAuint64 SystemClockFrequencyHz;
+} HsaClockCounters;
+
+#ifndef DEFINE_GUID
+typedef struct _HSA_UUID
+{
+ HSAuint32 Data1;
+ HSAuint16 Data2;
+ HSAuint16 Data3;
+ HSAuint8 Data4[8];
+} HSA_UUID;
+
+#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+ static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
+#else
+#define HSA_UUID GUID
+#define HSA_DEFINE_UUID DEFINE_GUID
+#endif
+
+
+// GUID that identifies the GPU Shader Sequencer (SQ) block
+// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
+0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
+
+// GUID that identifies the GPU Memory Controller (MC) block
+// {13900B57-4956-4D98-81D0-68521937F59C}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
+0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
+
+// GUID that identifies the IMOMMUv2 HW device
+// {80969879-B0F6-4BE6-97F6-6A6300F5101D}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
+0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
+
+// GUID that identifies the KFD
+// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
+0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
+
+typedef enum _HSA_PROFILE_TYPE
+{
+ HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
+ HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
+ //writes to memory on updates (KFD access only)
+ HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter
+ HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter
+ //...
+ HSA_PROFILE_TYPE_NUM,
+
+ HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value
+} HSA_PROFILE_TYPE;
+
+
+typedef struct _HsaCounterFlags
+{
+ union
+ {
+ struct
+ {
+ unsigned int Global : 1; // counter is global
+ // (not tied to VMID/WAVE/CU, ...)
+ unsigned int Resettable : 1; // counter can be reset by SW
+ // (always to 0?)
+ unsigned int ReadOnly : 1; // counter is read-only
+ // (but may be reset, if indicated)
+ unsigned int Stream : 1; // counter has streaming capability
+ // (after trigger, updates buffer)
+ unsigned int Reserved : 28;
+ } ui32;
+ HSAuint32 Value;
+ };
+} HsaCounterFlags;
+
+
+typedef struct _HsaCounter
+{
+ HSA_PROFILE_TYPE Type; // specifies the counter type
+ HSAuint64 CounterId; // indicates counter register offset
+ HSAuint32 CounterSizeInBits; // indicates relevant counter bits
+ HSAuint64 CounterMask; // bitmask for counter value (if applicable)
+ HsaCounterFlags Flags; // Property flags (see above)
+ HSAuint32 BlockIndex; // identifies block the counter belongs to,
+ // value may be 0 to NumBlocks
+} HsaCounter;
+
+
+typedef struct _HsaCounterBlockProperties
+{
+ HSA_UUID BlockId; // specifies the block location
+ HSAuint32 NumCounters; // How many counters are available?
+ // (sizes Counters[] array below)
+ HSAuint32 NumConcurrent; // How many counter slots are available
+ // in block?
+ HsaCounter Counters[1]; // Start of counter array
+ // (NumCounters elements total)
+} HsaCounterBlockProperties;
+
+
+typedef struct _HsaCounterProperties
+{
+ HSAuint32 NumBlocks; // How many profilable block are available?
+ // (sizes Blocks[] array below)
+ HSAuint32 NumConcurrent; // How many blocks slots can be queried
+ // concurrently by HW?
+ HsaCounterBlockProperties Blocks[1]; // Start of block array
+ // (NumBlocks elements total)
+} HsaCounterProperties;
+
+typedef HSAuint64 HSATraceId;
+
+typedef struct _HsaPmcTraceRoot
+{
+ HSAuint64 TraceBufferMinSizeBytes;// (page aligned)
+ HSAuint32 NumberOfPasses;
+ HSATraceId TraceId;
+} HsaPmcTraceRoot;
+
+#pragma pack(pop, hsakmttypes_h)
+
+
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+#endif //_HSAKMTTYPES_H_
diff --git a/hsakmt/kfd_ioctl.h b/hsakmt/kfd_ioctl.h
new file mode 100644
index 0000000..d683342
--- /dev/null
+++ b/hsakmt/kfd_ioctl.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_IOCTL_H_INCLUDED
+#define KFD_IOCTL_H_INCLUDED
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KFD_IOCTL_MAJOR_VERSION 1
+#define KFD_IOCTL_MINOR_VERSION 1
+
+struct kfd_ioctl_get_version_args {
+ uint32_t major_version; /* from KFD */
+ uint32_t minor_version; /* from KFD */
+};
+
+/* For kfd_ioctl_create_queue_args.queue_type. */
+#define KFD_IOC_QUEUE_TYPE_COMPUTE 0
+#define KFD_IOC_QUEUE_TYPE_SDMA 1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2
+
+#define KFD_MAX_QUEUE_PERCENTAGE 100
+#define KFD_MAX_QUEUE_PRIORITY 15
+
+struct kfd_ioctl_create_queue_args {
+ uint64_t ring_base_address; /* to KFD */
+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
+ uint64_t doorbell_offset; /* from KFD */
+
+ uint32_t ring_size; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t queue_type; /* to KFD */
+ uint32_t queue_percentage; /* to KFD */
+ uint32_t queue_priority; /* to KFD */
+ uint32_t queue_id; /* from KFD */
+
+ uint64_t eop_buffer_address; /* to KFD */
+ uint64_t eop_buffer_size; /* to KFD */
+ uint64_t ctx_save_restore_address; /* to KFD */
+ uint64_t ctx_save_restore_size; /* to KFD */
+};
+
+struct kfd_ioctl_destroy_queue_args {
+ uint32_t queue_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_update_queue_args {
+ uint64_t ring_base_address; /* to KFD */
+
+ uint32_t queue_id; /* to KFD */
+ uint32_t ring_size; /* to KFD */
+ uint32_t queue_percentage; /* to KFD */
+ uint32_t queue_priority; /* to KFD */
+};
+
+/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
+#define KFD_IOC_CACHE_POLICY_COHERENT 0
+#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
+
+struct kfd_ioctl_set_memory_policy_args {
+ uint64_t alternate_aperture_base; /* to KFD */
+ uint64_t alternate_aperture_size; /* to KFD */
+
+ uint32_t gpu_id; /* to KFD */
+ uint32_t default_policy; /* to KFD */
+ uint32_t alternate_policy; /* to KFD */
+ uint32_t pad;
+};
+
+/*
+ * All counters are monotonic. They are used for profiling of compute jobs.
+ * The profiling is done by userspace.
+ *
+ * In case of GPU reset, the counter should not be affected.
+ */
+
+struct kfd_ioctl_get_clock_counters_args {
+ uint64_t gpu_clock_counter; /* from KFD */
+ uint64_t cpu_clock_counter; /* from KFD */
+ uint64_t system_clock_counter; /* from KFD */
+ uint64_t system_clock_freq; /* from KFD */
+
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+#define NUM_OF_SUPPORTED_GPUS 7
+
+struct kfd_process_device_apertures {
+ uint64_t lds_base; /* from KFD */
+ uint64_t lds_limit; /* from KFD */
+ uint64_t scratch_base; /* from KFD */
+ uint64_t scratch_limit; /* from KFD */
+ uint64_t gpuvm_base; /* from KFD */
+ uint64_t gpuvm_limit; /* from KFD */
+ uint32_t gpu_id; /* from KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_get_process_apertures_args {
+ struct kfd_process_device_apertures
+ process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
+
+ /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
+ uint32_t num_of_nodes;
+ uint32_t pad;
+};
+
+#define MAX_ALLOWED_NUM_POINTS 100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE 128
+
+struct kfd_ioctl_dbg_register_args {
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+ uint64_t content_ptr; /* a pointer to the actual content */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+ uint64_t content_ptr; /* a pointer to the actual content */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+};
+
+/* Matching HSA_EVENTTYPE */
+#define KFD_IOC_EVENT_SIGNAL 0
+#define KFD_IOC_EVENT_NODECHANGE 1
+#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
+#define KFD_IOC_EVENT_HW_EXCEPTION 3
+#define KFD_IOC_EVENT_SYSTEM_EVENT 4
+#define KFD_IOC_EVENT_DEBUG_EVENT 5
+#define KFD_IOC_EVENT_PROFILE_EVENT 6
+#define KFD_IOC_EVENT_QUEUE_EVENT 7
+#define KFD_IOC_EVENT_MEMORY 8
+
+#define KFD_IOC_WAIT_RESULT_COMPLETE 0
+#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
+#define KFD_IOC_WAIT_RESULT_FAIL 2
+
+#define KFD_SIGNAL_EVENT_LIMIT 256
+
+struct kfd_ioctl_create_event_args {
+ uint64_t event_page_offset; /* from KFD */
+ uint32_t event_trigger_data; /* from KFD - signal events only */
+ uint32_t event_type; /* to KFD */
+ uint32_t auto_reset; /* to KFD */
+ uint32_t node_id; /* to KFD - only valid for certain
+ event types */
+ uint32_t event_id; /* from KFD */
+ uint32_t event_slot_index; /* from KFD */
+};
+
+struct kfd_ioctl_destroy_event_args {
+ uint32_t event_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_set_event_args {
+ uint32_t event_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_reset_event_args {
+ uint32_t event_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_memory_exception_failure {
+ uint32_t NotPresent; /* Page not present or supervisor privilege */
+ uint32_t ReadOnly; /* Write access to a read-only page */
+ uint32_t NoExecute; /* Execute access to a page marked NX */
+ uint32_t pad;
+};
+
+/* memory exception data*/
+struct kfd_hsa_memory_exception_data {
+ struct kfd_memory_exception_failure failure;
+ uint64_t va;
+ uint32_t gpu_id;
+ uint32_t pad;
+};
+
+/* Event data*/
+struct kfd_event_data {
+ union {
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ }; /* From KFD */
+ uint64_t kfd_event_data_ext; /* pointer to an extension structure
+ for future exception types */
+ uint32_t event_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_wait_events_args {
+ uint64_t events_ptr; /* pointed to struct
+ kfd_event_data array, to KFD */
+ uint32_t num_events; /* to KFD */
+ uint32_t wait_for_all; /* to KFD */
+ uint32_t timeout; /* to KFD */
+ uint32_t wait_result; /* from KFD */
+};
+
+#define AMDKFD_IOCTL_BASE 'K'
+#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
+#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type)
+
+#define AMDKFD_IOC_GET_VERSION \
+ AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
+
+#define AMDKFD_IOC_CREATE_QUEUE \
+ AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
+
+#define AMDKFD_IOC_DESTROY_QUEUE \
+ AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
+
+#define AMDKFD_IOC_SET_MEMORY_POLICY \
+ AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
+
+#define AMDKFD_IOC_GET_CLOCK_COUNTERS \
+ AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
+
+#define AMDKFD_IOC_GET_PROCESS_APERTURES \
+ AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
+
+#define AMDKFD_IOC_UPDATE_QUEUE \
+ AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
+
+#define AMDKFD_IOC_CREATE_EVENT \
+ AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
+
+#define AMDKFD_IOC_DESTROY_EVENT \
+ AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
+
+#define AMDKFD_IOC_SET_EVENT \
+ AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
+
+#define AMDKFD_IOC_RESET_EVENT \
+ AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
+
+#define AMDKFD_IOC_WAIT_EVENTS \
+ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+
+#define AMDKFD_IOC_DBG_REGISTER \
+ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER \
+ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH \
+ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL \
+ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
+#define AMDKFD_COMMAND_START 0x01
+#define AMDKFD_COMMAND_END 0x11
+
+#endif
diff --git a/hsakmt/libhsakmt.c b/hsakmt/libhsakmt.c
new file mode 100644
index 0000000..d7f79d3
--- /dev/null
+++ b/hsakmt/libhsakmt.c
@@ -0,0 +1,18 @@
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#include "libhsakmt.h"
+
+/**
+ * Call ioctl, restarting if it is interupted
+ */
+int
+kmtIoctl(int fd, unsigned long request, void *arg)
+{
+ int ret;
+
+ do {
+ ret = ioctl(fd, request, arg);
+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+ return ret;
+}
diff --git a/hsakmt/libhsakmt.h b/hsakmt/libhsakmt.h
new file mode 100644
index 0000000..0d73c8f
--- /dev/null
+++ b/hsakmt/libhsakmt.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIBHSAKMT_H_INCLUDED
+#define LIBHSAKMT_H_INCLUDED
+
+#include "hsakmt.h"
+#include <pthread.h>
+#include <stdint.h>
+#include <limits.h>
+
+extern int kfd_fd;
+extern unsigned long kfd_open_count;
+extern pthread_mutex_t hsakmt_mutex;
+
+#undef HSAKMTAPI
+#define HSAKMTAPI __attribute__((visibility ("default")))
+
+/*Avoid pointer-to-int-cast warning*/
+#define PORT_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr))
+
+/*Avoid int-to-pointer-cast warning*/
+#define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))
+
+#define CHECK_KFD_OPEN() \
+ do { if (kfd_open_count == 0) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)
+
+#define PAGE_SIZE 4096
+
+#define CHECK_PAGE_MULTIPLE(x) \
+ do { if ((uint64_t)PORT_VPTR_TO_UINT64(x) % PAGE_SIZE) return HSAKMT_STATUS_INVALID_PARAMETER; } while(0)
+
+#define PAGE_ALIGN_UP(x) (((uint64_t)(x) + PAGE_SIZE - 1) & ~(uint64_t)(PAGE_SIZE-1))
+#define BITMASK(n) (((n) < sizeof(1ULL) * CHAR_BIT ? (1ULL << (n)) : 0) - 1ULL)
+
+/*
+ * Even though the toplogy code doesn't limit us to maximum number of nodes,
+ * the current HSA spec says the maximum is 8 nodes
+ */
+#define MAX_NODES 8
+
+HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
+HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
+uint16_t get_device_id_by_node(HSAuint32 node_id);
+
+extern int kmtIoctl(int fd, unsigned long request, void *arg);
+
+/* Void pointer arithmetic (or remove -Wpointer-arith to allow void pointers arithmetic) */
+#define VOID_PTR_ADD32(ptr,n) (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
+#define VOID_PTR_ADD(ptr,n) (void*)((uint8_t*)(ptr) + n)/*ptr + offset*/
+#define VOID_PTR_SUB(ptr,n) (void*)((uint8_t*)(ptr) - n)/*ptr - offset*/
+#define VOID_PTRS_SUB(ptr1,ptr2) (uint64_t)((uint8_t*)(ptr1) - (uint8_t*)(ptr2)) /*ptr1 - ptr2*/
+
+#endif
diff --git a/hsakmt/libhsakmt.ver b/hsakmt/libhsakmt.ver
new file mode 100644
index 0000000..9c6e6cb
--- /dev/null
+++ b/hsakmt/libhsakmt.ver
@@ -0,0 +1,46 @@
+HSAKMT_1
+{
+global:
+hsaKmtOpenKFD;
+hsaKmtCloseKFD;
+hsaKmtGetVersion;
+hsaKmtAcquireSystemProperties;
+hsaKmtReleaseSystemProperties;
+hsaKmtGetNodeProperties;
+hsaKmtGetNodeMemoryProperties;
+hsaKmtGetNodeCacheProperties;
+hsaKmtGetNodeIoLinkProperties;
+hsaKmtCreateEvent;
+hsaKmtDestroyEvent;
+hsaKmtSetEvent;
+hsaKmtResetEvent;
+hsaKmtQueryEventState;
+hsaKmtWaitOnEvent;
+hsaKmtWaitOnMultipleEvents;
+hsaKmtCreateQueue;
+hsaKmtUpdateQueue;
+hsaKmtDestroyQueue;
+hsaKmtSetMemoryPolicy;
+hsaKmtAllocMemory;
+hsaKmtFreeMemory;
+hsaKmtRegisterMemory;
+hsaKmtDeregisterMemory;
+hsaKmtMapMemoryToGPU;
+hsaKmtUnmapMemoryToGPU;
+hsaKmtDbgRegister;
+hsaKmtDbgUnregister;
+hsaKmtDbgWavefrontControl;
+hsaKmtDbgAddressWatch;
+hsaKmtGetClockCounters;
+hsaKmtPmcGetCounterProperties;
+hsaKmtPmcRegisterTrace;
+hsaKmtPmcUnregisterTrace;
+hsaKmtPmcAcquireTraceAccess;
+hsaKmtPmcReleaseTraceAccess;
+hsaKmtPmcStartTrace;
+hsaKmtPmcQueryTrace;
+hsaKmtPmcStopTrace;
+
+local: *;
+};
+
diff --git a/hsakmt/memory.c b/hsakmt/memory.c
new file mode 100644
index 0000000..718dd97
--- /dev/null
+++ b/hsakmt/memory.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "fmm.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetMemoryPolicy(
+ HSAuint32 Node,
+ HSAuint32 DefaultPolicy,
+ HSAuint32 AlternatePolicy,
+ void* MemoryAddressAlternate,
+ HSAuint64 MemorySizeInBytes
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(Node, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ // We accept any legal policy and alternate address location. You get CC everywhere anyway.
+ if ((DefaultPolicy != HSA_CACHING_CACHED && DefaultPolicy != HSA_CACHING_NONCACHED)
+ || (AlternatePolicy != HSA_CACHING_CACHED && AlternatePolicy != HSA_CACHING_NONCACHED))
+ {
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+
+ CHECK_PAGE_MULTIPLE(MemoryAddressAlternate);
+ CHECK_PAGE_MULTIPLE(MemorySizeInBytes);
+
+ struct kfd_ioctl_set_memory_policy_args args;
+ memset(&args, 0, sizeof(args));
+
+ args.gpu_id = gpu_id;
+ args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
+ args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
+ args.alternate_aperture_base = (uintptr_t)MemoryAddressAlternate;
+ args.alternate_aperture_size = MemorySizeInBytes;
+
+ int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
+
+ return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
+}
+
+static HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
+{
+ switch (pageSizeFlags)
+ {
+ case HSA_PAGE_SIZE_4KB: return 4*1024;
+ case HSA_PAGE_SIZE_64KB: return 64*1024;
+ case HSA_PAGE_SIZE_2MB: return 2*1024*1024;
+ case HSA_PAGE_SIZE_1GB: return 1024*1024*1024;
+ default: assert(false); return 4*1024;
+ }
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAllocMemory(
+ HSAuint32 PreferredNode, //IN
+ HSAuint64 SizeInBytes, //IN (multiple of page size)
+ HsaMemFlags MemFlags, //IN
+ void** MemoryAddress //OUT (page-aligned)
+ )
+{
+ CHECK_KFD_OPEN();
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ int err;
+
+ result = validate_nodeid(PreferredNode, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ // The required size should be page aligned (GDS?)
+ HSAuint64 page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
+ if ((SizeInBytes & (page_size-1)) && !MemFlags.ui32.GDSMemory){
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+
+ if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged) {
+ err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
+ if (err != 0)
+ return HSAKMT_STATUS_NO_MEMORY;
+ if (MemFlags.ui32.ExecuteAccess) {
+ err = mprotect(*MemoryAddress, SizeInBytes, PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (err != 0) {
+ free(*MemoryAddress);
+ return err;
+ }
+ }
+ return HSAKMT_STATUS_SUCCESS;
+ }
+
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtFreeMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 SizeInBytes //IN
+ )
+{
+ HSAKMT_STATUS hsa_status = HSAKMT_STATUS_SUCCESS;
+ CHECK_KFD_OPEN();
+
+ if (fmm_is_inside_some_aperture(MemoryAddress)){
+ if (fmm_release( MemoryAddress, SizeInBytes))
+ hsa_status = HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+ else
+ free(MemoryAddress);
+
+ return hsa_status;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtRegisterMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes //IN (page-aligned)
+ )
+{
+ CHECK_KFD_OPEN();
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDeregisterMemory(
+ void* MemoryAddress //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtMapMemoryToGPU(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes, //IN (page-aligned)
+ HSAuint64* AlternateVAGPU //OUT (page-aligned)
+ )
+{
+ CHECK_KFD_OPEN();
+
+ // We don't support GPUVM in the stub, there should never be a request for a GPUVA.
+ if (AlternateVAGPU)
+ {
+ *AlternateVAGPU = 0;
+ }
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUnmapMemoryToGPU(
+ void* MemoryAddress //IN (page-aligned)
+ )
+{
+ CHECK_KFD_OPEN();
+
+ return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/openclose.c b/hsakmt/openclose.c
new file mode 100644
index 0000000..d5b91e2
--- /dev/null
+++ b/hsakmt/openclose.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "fmm.h"
+
+static const char kfd_device_name[] = "/dev/kfd";
+static const char tmp_file[] = "/var/lock/.amd_hsa_thunk_lock";
+int amd_hsa_thunk_lock_fd = 0;
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenKFD(void)
+{
+ HSAKMT_STATUS result;
+
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ if (kfd_open_count == 0)
+ {
+ int fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);
+
+ if (fd != -1)
+ {
+ kfd_fd = fd;
+ kfd_open_count = 1;
+
+ result = fmm_init_process_apertures();
+ if (result != HSAKMT_STATUS_SUCCESS)
+ close(fd);
+ }
+ else
+ {
+ result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
+ }
+
+ amd_hsa_thunk_lock_fd = open(tmp_file,
+ O_CREAT | //create the file if it's not present.
+ O_RDWR, //only need write access for the internal locking semantics.
+ S_IRUSR | S_IWUSR); //permissions on the file, 600 here.
+ }
+ else
+ {
+ kfd_open_count++;
+ result = HSAKMT_STATUS_SUCCESS;
+ }
+
+ pthread_mutex_unlock(&hsakmt_mutex);
+
+ return result;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCloseKFD(void)
+{
+ HSAKMT_STATUS result;
+
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ if (kfd_open_count > 0)
+ {
+ if (--kfd_open_count == 0)
+ {
+ close(kfd_fd);
+
+ if (amd_hsa_thunk_lock_fd > 0) {
+ close(amd_hsa_thunk_lock_fd);
+ unlink(tmp_file);
+ }
+
+ }
+
+ result = HSAKMT_STATUS_SUCCESS;
+ }
+ else
+ {
+ result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
+ }
+
+ pthread_mutex_unlock(&hsakmt_mutex);
+
+ return result;
+}
diff --git a/hsakmt/perfctr.c b/hsakmt/perfctr.c
new file mode 100644
index 0000000..64ab168
--- /dev/null
+++ b/hsakmt/perfctr.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include "libhsakmt.h"
+#include "pmc_table.h"
+#include "linux/kfd_ioctl.h"
+#include <unistd.h>
+
+#define BITS_PER_BYTE CHAR_BIT
+
+#define HSA_PERF_MAGIC4CC 0x54415348
+
+enum perf_trace_state {
+ PERF_TRACE_STATE__STOPPED = 0,
+ PERF_TRACE_STATE__STARTED
+};
+
+struct perf_trace {
+ uint32_t magic4cc;
+ uint32_t gpu_id;
+ enum perf_trace_state state;
+};
+
+extern int amd_hsa_thunk_lock_fd;
+
+static HsaCounterProperties *counter_props[MAX_NODES] = {NULL};
+
+static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
+{
+ int rc = 0;
+ switch (block_id) {
+ case PERFCOUNTER_BLOCKID__SQ:
+ *uuid = HSA_PROFILEBLOCK_AMD_SQ;
+ break;
+ default:
+ /* If we reach this point, it's a bug */
+ rc = -1;
+ }
+
+ return rc;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcGetCounterProperties(
+ HSAuint32 NodeId, //IN
+ HsaCounterProperties** CounterProperties //OUT
+ )
+{
+ HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
+ uint32_t gpu_id, i, block_id;
+ uint16_t dev_id;
+ uint32_t counter_props_size = 0;
+ uint32_t total_counters = 0;
+ uint32_t total_concurrent = 0;
+ struct perf_counter_block block = {0};
+
+ if (CounterProperties == NULL)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+
+
+ if (counter_props[NodeId] == NULL) {
+ dev_id = get_device_id_by_node(NodeId);
+ for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
+ rc = get_block_properties(dev_id, i, &block);
+ if (rc != HSAKMT_STATUS_SUCCESS)
+ return rc;
+ total_concurrent += block.num_of_slots;
+ total_counters += block.num_of_counters;
+ }
+
+ counter_props_size = sizeof(HsaCounterProperties) +
+ sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
+ sizeof(HsaCounter)*(total_counters-1);
+
+ counter_props[NodeId] = malloc(counter_props_size);
+
+ if (counter_props[NodeId] == NULL)
+ return HSAKMT_STATUS_NO_MEMORY;
+
+ counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
+ counter_props[NodeId]->NumConcurrent = total_concurrent;
+
+ for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
+ {
+ rc = get_block_properties(dev_id, block_id, &block);
+ if (rc != HSAKMT_STATUS_SUCCESS) {
+ free(counter_props[NodeId]);
+ return rc;
+ }
+
+ /* Filling the SQ block */
+ blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
+ counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
+ counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
+
+ for (i = 0; i < block.num_of_counters; i++) {
+ counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
+ counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
+ }
+ }
+ }
+
+ *CounterProperties = counter_props[NodeId];
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+/**
+ Registers a set of (HW) counters to be used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcRegisterTrace(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumberOfCounters, //IN
+ HsaCounter* Counters, //IN
+ HsaPmcTraceRoot* TraceRoot //OUT
+ )
+{
+ uint32_t gpu_id, i;
+ uint64_t min_buf_size = 0;
+ uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
+ struct perf_trace *trace = NULL;
+
+ if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ /* Calculating the minimum buffer size */
+ for (i = 0; i < NumberOfCounters; i++) {
+ if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+ min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
+ concurrent_counters[Counters[i].BlockIndex]++;
+ }
+
+ /* Verifying that the number of counters per block is not larger than the amount of slots */
+ if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = malloc(sizeof(trace));
+ if (trace == NULL)
+ return HSAKMT_STATUS_NO_MEMORY;
+
+ trace->magic4cc = HSA_PERF_MAGIC4CC;
+ trace->gpu_id = gpu_id;
+ trace->state = PERF_TRACE_STATE__STOPPED;
+
+ TraceRoot->NumberOfPasses = 1;
+ TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
+ TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+/**
+ Unregisters a set of (HW) counters used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcUnregisterTrace(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ )
+{
+ uint32_t gpu_id;
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (validate_nodeid(NodeId, &gpu_id) != 0)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ if (trace->gpu_id != gpu_id)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+ /* If the trace is in the running state, stop it */
+ if (trace->state == PERF_TRACE_STATE__STARTED) {
+ HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
+ if (status != HSAKMT_STATUS_SUCCESS)
+ return status;
+ }
+
+ free(trace);
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+ Allows a user mode process to get exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcAcquireTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ )
+{
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ if (amd_hsa_thunk_lock_fd > 0) {
+ if (lockf( amd_hsa_thunk_lock_fd, F_TLOCK, 0 ) != 0)
+ return HSAKMT_STATUS_ERROR;
+ else
+ return HSAKMT_STATUS_SUCCESS;
+ }
+ else {
+ return HSAKMT_STATUS_ERROR;
+ }
+}
+
+
+/**
+ Allows a user mode process to release exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcReleaseTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ )
+{
+ struct perf_trace *trace;
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ if (amd_hsa_thunk_lock_fd > 0) {
+ if (lockf( amd_hsa_thunk_lock_fd, F_ULOCK, 0 ) != 0)
+ return HSAKMT_STATUS_ERROR;
+ else
+ return HSAKMT_STATUS_SUCCESS;
+ }
+ else {
+ return HSAKMT_STATUS_ERROR;
+ }
+
+}
+
+
+/**
+ Starts tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStartTrace(
+ HSATraceId TraceId, //IN
+ void* TraceBuffer, //IN (page aligned)
+ HSAuint64 TraceBufferSizeBytes //IN (page aligned)
+ )
+{
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ trace->state = PERF_TRACE_STATE__STARTED;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+ Forces an update of all the counters that a previously started trace operation has registered
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcQueryTrace(
+ HSATraceId TraceId //IN
+ )
+{
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+ Stops tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStopTrace(
+ HSATraceId TraceId //IN
+ )
+{
+ struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+ if (TraceId == 0)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+ return HSAKMT_STATUS_INVALID_HANDLE;
+
+ trace->state = PERF_TRACE_STATE__STOPPED;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/pmc_table.c b/hsakmt/pmc_table.c
new file mode 100644
index 0000000..0390639
--- /dev/null
+++ b/hsakmt/pmc_table.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "pmc_table.h"
+
+
+static uint32_t kaveri_sq_counter_ids[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101,
+ 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+ 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
+ 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 168, 169, 170,
+ 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+ 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202,
+ 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250
+};
+
+static uint32_t carrizo_sq_counter_ids[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101,
+ 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+ 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
+ 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 168, 169, 170,
+ 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+ 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202,
+ 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
+ 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
+ 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250
+};
+
+static struct perf_counter_block kaveri_blocks[PERFCOUNTER_BLOCKID__MAX] = {
+ [PERFCOUNTER_BLOCKID__SQ] = {
+ .num_of_slots = 16,
+ .num_of_counters = sizeof(kaveri_sq_counter_ids)/sizeof(*kaveri_sq_counter_ids),
+ .counter_ids = kaveri_sq_counter_ids,
+ .counter_size_in_bits = 64,
+ .counter_mask = BITMASK(64)
+ },
+};
+
+static struct perf_counter_block carrizo_blocks[PERFCOUNTER_BLOCKID__MAX] = {
+ [PERFCOUNTER_BLOCKID__SQ] = {
+ .num_of_slots = 16,
+ .num_of_counters = sizeof(carrizo_sq_counter_ids)/sizeof(*carrizo_sq_counter_ids),
+ .counter_ids = carrizo_sq_counter_ids,
+ .counter_size_in_bits = 64,
+ .counter_mask = BITMASK(64)
+ },
+};
+
+HSAKMT_STATUS
+get_block_properties(uint16_t dev_id,
+ enum perf_block_id block_id,
+ struct perf_counter_block *block)
+{
+ HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
+ if (block_id > PERFCOUNTER_BLOCKID__MAX || block_id < PERFCOUNTER_BLOCKID__FIRST)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ switch(dev_id) {
+ case 0x1304:
+ case 0x1305:
+ case 0x1306:
+ case 0x1307:
+ case 0x1309:
+ case 0x130A:
+ case 0x130B:
+ case 0x130C:
+ case 0x130D:
+ case 0x130E:
+ case 0x130F:
+ case 0x1310:
+ case 0x1311:
+ case 0x1312:
+ case 0x1313:
+ case 0x1315:
+ case 0x1316:
+ case 0x1317:
+ case 0x1318:
+ case 0x131B:
+ case 0x131C:
+ case 0x131D:
+ *block = kaveri_blocks[block_id];
+ break;
+
+ case 0x9870:
+ case 0x9874:
+ case 0x9875:
+ case 0x9876:
+ case 0x9877:
+ *block = carrizo_blocks[block_id];
+ break;
+
+ default:
+ rc = HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+
+ return rc;
+}
+
+
diff --git a/hsakmt/pmc_table.h b/hsakmt/pmc_table.h
new file mode 100644
index 0000000..35ed07e
--- /dev/null
+++ b/hsakmt/pmc_table.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PMC_TABLE_H
+#define PMC_TABLE_H
+
+#include "libhsakmt.h"
+
+enum perf_block_id {
+ PERFCOUNTER_BLOCKID__FIRST = 0,
+ PERFCOUNTER_BLOCKID__SQ = PERFCOUNTER_BLOCKID__FIRST,
+ PERFCOUNTER_BLOCKID__MAX
+};
+
+struct perf_counter_block {
+ uint32_t num_of_slots;
+ uint32_t num_of_counters;
+ uint32_t *counter_ids;
+ uint32_t counter_size_in_bits;
+ uint64_t counter_mask;
+};
+
+HSAKMT_STATUS
+get_block_properties(uint16_t dev_id,
+ enum perf_block_id block_id,
+ struct perf_counter_block *block);
+
+#endif // PMC_TABLE_H
diff --git a/hsakmt/queues.c b/hsakmt/queues.c
new file mode 100644
index 0000000..2d7692f
--- /dev/null
+++ b/hsakmt/queues.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <math.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+/* 1024 doorbells, 4 bytes each doorbell */
+#define DOORBELLS_PAGE_SIZE 1024 * 4
+
+struct device_info
+{
+ uint32_t ctx_save_restore_size;
+ uint32_t eop_buffer_size;
+};
+
+struct device_info kaveri_device_info = {
+ .ctx_save_restore_size = 0,
+ .eop_buffer_size = 0,
+};
+
+struct device_info carrizo_device_info = {
+ .ctx_save_restore_size = 2756608,
+ .eop_buffer_size = 4096,
+};
+
+struct device_id
+{
+ uint16_t dev_id;
+ struct device_info *dev_info;
+};
+
+struct device_id supported_devices[] = {
+ { 0x1304, &kaveri_device_info }, /* Kaveri */
+ { 0x1305, &kaveri_device_info }, /* Kaveri */
+ { 0x1306, &kaveri_device_info }, /* Kaveri */
+ { 0x1307, &kaveri_device_info }, /* Kaveri */
+ { 0x1309, &kaveri_device_info }, /* Kaveri */
+ { 0x130A, &kaveri_device_info }, /* Kaveri */
+ { 0x130B, &kaveri_device_info }, /* Kaveri */
+ { 0x130C, &kaveri_device_info }, /* Kaveri */
+ { 0x130D, &kaveri_device_info }, /* Kaveri */
+ { 0x130E, &kaveri_device_info }, /* Kaveri */
+ { 0x130F, &kaveri_device_info }, /* Kaveri */
+ { 0x1310, &kaveri_device_info }, /* Kaveri */
+ { 0x1311, &kaveri_device_info }, /* Kaveri */
+ { 0x1312, &kaveri_device_info }, /* Kaveri */
+ { 0x1313, &kaveri_device_info }, /* Kaveri */
+ { 0x1315, &kaveri_device_info }, /* Kaveri */
+ { 0x1316, &kaveri_device_info }, /* Kaveri */
+ { 0x1317, &kaveri_device_info }, /* Kaveri */
+ { 0x1318, &kaveri_device_info }, /* Kaveri */
+ { 0x131B, &kaveri_device_info }, /* Kaveri */
+ { 0x131C, &kaveri_device_info }, /* Kaveri */
+ { 0x131D, &kaveri_device_info }, /* Kaveri */
+ { 0x9870, &carrizo_device_info }, /* Carrizo */
+ { 0x9874, &carrizo_device_info }, /* Carrizo */
+ { 0x9875, &carrizo_device_info }, /* Carrizo */
+ { 0x9876, &carrizo_device_info }, /* Carrizo */
+ { 0x9877, &carrizo_device_info }, /* Carrizo */
+ { 0, NULL }
+};
+
+struct queue
+{
+ uint32_t queue_id;
+ uint32_t wptr;
+ uint32_t rptr;
+ void *eop_buffer;
+ void *ctx_save_restore;
+};
+
+struct process_doorbells
+{
+ bool need_mmap;
+ void* doorbells;
+ pthread_mutex_t doorbells_mutex;
+};
+
+struct process_doorbells doorbells[] = {[0 ... (NUM_OF_SUPPORTED_GPUS-1)] = {.need_mmap = true, .doorbells = NULL, .doorbells_mutex = PTHREAD_MUTEX_INITIALIZER}};
+
+static struct device_info *get_device_info_by_dev_id(uint16_t dev_id)
+{
+ int i = 0;
+ while (supported_devices[i].dev_id != 0) {
+ if (supported_devices[i].dev_id == dev_id) {
+ return supported_devices[i].dev_info;
+ }
+ i++;
+ }
+
+ return NULL;
+}
+
+static void free_queue(struct queue *q)
+{
+ if (q->eop_buffer)
+ free(q->eop_buffer);
+ if (q->ctx_save_restore)
+ free(q->ctx_save_restore);
+ free(q);
+}
+
+static void* allocate_exec_aligned_memory(uint32_t size, uint32_t align)
+{
+ void *ptr;
+ int retval;
+
+ retval = posix_memalign(&ptr, align, size);
+ if (retval != 0)
+ return NULL;
+
+ retval = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (retval != 0) {
+ free(ptr);
+ return NULL;
+ }
+
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+static int handle_concrete_asic(struct device_info *dev_info, struct queue *q,
+ struct kfd_ioctl_create_queue_args *args)
+{
+ if (dev_info) {
+ if (dev_info->eop_buffer_size > 0) {
+ q->eop_buffer =
+ allocate_exec_aligned_memory(dev_info->eop_buffer_size, PAGE_SIZE);
+ if (q->eop_buffer == NULL) {
+ return HSAKMT_STATUS_NO_MEMORY;
+ }
+ args->eop_buffer_address = (uintptr_t)q->eop_buffer;
+ args->eop_buffer_size = dev_info->eop_buffer_size;
+ }
+ if (dev_info->ctx_save_restore_size > 0) {
+ args->ctx_save_restore_size = dev_info->ctx_save_restore_size;
+ q->ctx_save_restore =
+ allocate_exec_aligned_memory(dev_info->ctx_save_restore_size, PAGE_SIZE);
+ if (q->ctx_save_restore == NULL) {;
+ return HSAKMT_STATUS_NO_MEMORY;
+ }
+ args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
+ }
+ }
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateQueue(
+ HSAuint32 NodeId, //IN
+ HSA_QUEUE_TYPE Type, //IN
+ HSAuint32 QueuePercentage, //IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSizeInBytes, //IN
+ HsaEvent* Event, //IN
+ HsaQueueResource* QueueResource //OUT
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ uint16_t dev_id;
+ struct device_info *dev_info;
+ int err;
+ void* ptr;
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ struct queue *q = malloc(sizeof(struct queue));
+ if (q == NULL)
+ return HSAKMT_STATUS_NO_MEMORY;
+ memset(q, 0, sizeof(*q));
+
+ struct kfd_ioctl_create_queue_args args;
+ memset(&args, 0, sizeof(args));
+
+ dev_id = get_device_id_by_node(NodeId);
+ dev_info = get_device_info_by_dev_id(dev_id);
+ args.gpu_id = gpu_id;
+
+ err = handle_concrete_asic(dev_info, q, &args);
+ if (err != HSAKMT_STATUS_SUCCESS) {
+ free_queue(q);
+ return err;
+ }
+
+ switch (Type)
+ {
+ case HSA_QUEUE_COMPUTE: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE; break;
+ case HSA_QUEUE_SDMA: free(q); return HSAKMT_STATUS_NOT_IMPLEMENTED;
+ case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break;
+ default: free_queue(q); return HSAKMT_STATUS_INVALID_PARAMETER;
+ }
+
+ if (Type != HSA_QUEUE_COMPUTE_AQL)
+ {
+ QueueResource->QueueRptrValue = (uintptr_t)&q->rptr;
+ QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
+ }
+
+ args.read_pointer_address = QueueResource->QueueRptrValue;
+ args.write_pointer_address = QueueResource->QueueWptrValue;
+ args.ring_base_address = (uintptr_t)QueueAddress;
+ args.ring_size = QueueSizeInBytes;
+ args.queue_percentage = QueuePercentage;
+ args.queue_priority = Priority;
+
+ err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
+
+ if (err == -1)
+ {
+ free_queue(q);
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ q->queue_id = args.queue_id;
+
+ pthread_mutex_lock(&doorbells[NodeId].doorbells_mutex);
+
+ if (doorbells[NodeId].need_mmap) {
+ ptr = mmap(0, DOORBELLS_PAGE_SIZE, PROT_READ|PROT_WRITE,
+ MAP_SHARED, kfd_fd, args.doorbell_offset);
+
+ if (ptr == MAP_FAILED) {
+ pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
+ hsaKmtDestroyQueue(q->queue_id);
+ free_queue(q);
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ doorbells[NodeId].need_mmap = false;
+ doorbells[NodeId].doorbells = ptr;
+ }
+
+ pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
+
+ QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
+ QueueResource->Queue_DoorBell = VOID_PTR_ADD32(doorbells[NodeId].doorbells, q->queue_id);
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUpdateQueue(
+ HSA_QUEUEID QueueId, //IN
+ HSAuint32 QueuePercentage,//IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSize, //IN
+ HsaEvent* Event //IN
+ )
+{
+ struct kfd_ioctl_update_queue_args arg;
+ struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
+
+ CHECK_KFD_OPEN();
+
+ if (q == NULL)
+ return (HSAKMT_STATUS_INVALID_PARAMETER);
+ arg.queue_id = (HSAuint32)q->queue_id;
+ arg.ring_base_address = (uintptr_t)QueueAddress;
+ arg.ring_size = QueueSize;
+ arg.queue_percentage = QueuePercentage;
+ arg.queue_priority = Priority;
+
+ int err = kmtIoctl(kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
+ if (err == -1)
+ {
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyQueue(
+ HSA_QUEUEID QueueId //IN
+ )
+{
+ CHECK_KFD_OPEN();
+
+ struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
+ struct kfd_ioctl_destroy_queue_args args;
+
+ if (q == NULL)
+ return (HSAKMT_STATUS_INVALID_PARAMETER);
+
+ memset(&args, 0, sizeof(args));
+
+ args.queue_id = q->queue_id;
+
+ int err = kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
+
+ if (err == -1)
+ {
+ return HSAKMT_STATUS_ERROR;
+ }
+ else
+ {
+ free_queue(q);
+ return HSAKMT_STATUS_SUCCESS;
+ }
+}
diff --git a/hsakmt/time.c b/hsakmt/time.c
new file mode 100644
index 0000000..45709f9
--- /dev/null
+++ b/hsakmt/time.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetClockCounters(
+ HSAuint32 NodeId, //IN
+ HsaClockCounters* Counters //OUT
+ )
+{
+ HSAKMT_STATUS result;
+ uint32_t gpu_id;
+ struct kfd_ioctl_get_clock_counters_args args;
+ int err;
+
+ CHECK_KFD_OPEN();
+
+ result = validate_nodeid(NodeId, &gpu_id);
+ if (result != HSAKMT_STATUS_SUCCESS)
+ return result;
+
+ args.gpu_id = gpu_id;
+
+ err = kmtIoctl(kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
+ if (err < 0) {
+ result = HSAKMT_STATUS_ERROR;
+ } else {
+ /* At this point the result is already HSAKMT_STATUS_SUCCESS */
+ Counters->GPUClockCounter = args.gpu_clock_counter;
+ Counters->CPUClockCounter = args.cpu_clock_counter;
+ Counters->SystemClockCounter = args.system_clock_counter;
+ Counters->SystemClockFrequencyHz = args.system_clock_freq;
+ }
+
+ return result;
+}
diff --git a/hsakmt/topology.c b/hsakmt/topology.c
new file mode 100644
index 0000000..903b6f7
--- /dev/null
+++ b/hsakmt/topology.c
@@ -0,0 +1,991 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <malloc.h>
+#include <string.h>
+
+#include "libhsakmt.h"
+#include "fmm.h"
+#define PAGE_SIZE 4096
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#define NUM_OF_HEAPS 2
+/* SYSFS related */
+#define KFD_SYSFS_PATH_GENERATION_ID "/sys/devices/virtual/kfd/kfd/topology/generation_id"
+#define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "/sys/devices/virtual/kfd/kfd/topology/system_properties"
+#define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes"
+
+typedef struct {
+ uint32_t gpu_id;
+ HsaNodeProperties node;
+ HsaMemoryProperties *mem; /* node->NumBanks elements */
+ HsaCacheProperties *cache;
+ HsaIoLinkProperties *link;
+} node_t;
+
+static HsaSystemProperties *system = NULL;
+static node_t *node = NULL;
+
+static HSAKMT_STATUS topology_take_snapshot(void);
+static HSAKMT_STATUS topology_drop_snapshot(void);
+static int get_cpu_stepping(uint16_t* stepping);
+
+static void
+free_node(node_t *n)
+{
+ assert(n);
+
+ if (n == NULL)
+ return;
+
+ if ((n)->mem)
+ free((n)->mem);
+ if ((n)->cache)
+ free((n)->cache);
+ if ((n)->link)
+ free((n)->link);
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_generation(uint32_t *gen) {
+ FILE *fd;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(gen);
+ fd = fopen(KFD_SYSFS_PATH_GENERATION_ID, "r");
+ if (!fd)
+ return HSAKMT_STATUS_ERROR;
+ if (fscanf(fd, "%ul", gen) != 1) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err;
+ }
+
+err:
+ fclose(fd);
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_system_props(HsaSystemProperties *props) {
+ FILE *fd;
+ DIR *dirp;
+ char *read_buf, *p;
+ char prop_name[256];
+ long long unsigned int prop_val;
+ uint32_t node_count, prog;
+ struct dirent *dir;
+ int read_size;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+
+ assert(props);
+ fd = fopen(KFD_SYSFS_PATH_SYSTEM_PROPERTIES, "r");
+ if (!fd)
+ return HSAKMT_STATUS_ERROR;
+
+ read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ goto err1;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ /*
+ * Read the system properties
+ */
+ prog = 0;
+ p = read_buf;
+ while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+ if (strcmp(prop_name,"platform_oem") == 0)
+ props->PlatformOem = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"platform_id") == 0)
+ props->PlatformId = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"platform_rev") == 0)
+ props->PlatformRev = (uint32_t)prop_val;
+ }
+
+ /*
+ * Discover the number of nodes
+ */
+ node_count = 0;
+ dirp = opendir(KFD_SYSFS_PATH_NODES);
+ if(dirp) {
+ /*
+ * Assuming that inside nodes folder there are only folders
+ * which represent the node numbers
+ */
+ while ((dir = readdir(dirp)) != 0) {
+ if ((strcmp(dir->d_name, ".") == 0) ||
+ (strcmp(dir->d_name, "..") == 0))
+ continue;
+ node_count++;
+ }
+ closedir(dirp);
+ }
+ props->NumNodes = node_count;
+
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) {
+ FILE *fd;
+ char path[256];
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(gpu_id);
+ snprintf(path, 256, "%s/%d/gpu_id", KFD_SYSFS_PATH_NODES, node_id);
+ fd = fopen(path, "r");
+ if (!fd)
+ return HSAKMT_STATUS_ERROR;
+ if (fscanf(fd, "%ul", gpu_id) != 1) {
+ ret = HSAKMT_STATUS_ERROR;
+ }
+ fclose(fd);
+
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id) {
+ FILE *fd;
+ char *read_buf, *p;
+ char prop_name[256];
+ char path[256];
+ long long unsigned int prop_val;
+ uint32_t i, prog;
+ uint16_t stepping = 0, fw_version = 0;
+ int read_size;
+
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(props);
+ assert(gpu_id);
+ /* Retrieve the GPU ID */
+ ret = topology_sysfs_get_gpu_id(node_id, gpu_id);
+
+ /* Retrieve the marketing name of the node */
+ snprintf(path, 256, "%s/%d/name", KFD_SYSFS_PATH_NODES, node_id);
+ fd = fopen(path, "r");
+ if (!fd)
+ return HSAKMT_STATUS_ERROR;
+
+ read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ goto err1;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+ p = memchr(read_buf, '\n', read_size);
+ if ((!p) || ((p-read_buf) > HSA_PUBLIC_NAME_SIZE)) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+ /*
+ * Convert UTF8 to UTF16
+ */
+ for (i = 0; (i < HSA_PUBLIC_NAME_SIZE) && (read_buf[i] != '\n'); i++)
+ props->MarketingName[i] = read_buf[i];
+ props->MarketingName[i] = 0;
+ fclose(fd);
+
+ /* Retrieve the node properties */
+ snprintf(path, 256, "%s/%d/properties", KFD_SYSFS_PATH_NODES, node_id);
+ fd = fopen(path, "r");
+ if (!fd) {
+ free(read_buf);
+ return HSAKMT_STATUS_ERROR;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ /*
+ * Read the node properties
+ */
+ prog = 0;
+ p = read_buf;
+ while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+ if (strcmp(prop_name,"cpu_cores_count") == 0)
+ props->NumCPUCores = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"simd_count") == 0)
+ props->NumFComputeCores = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"mem_banks_count") == 0)
+ props->NumMemoryBanks = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"caches_count") == 0)
+ props->NumCaches = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"io_links_count") == 0)
+ props->NumIOLinks = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"cpu_core_id_base") == 0)
+ props->CComputeIdLo = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"simd_id_base") == 0)
+ props->FComputeIdLo = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"capability") == 0)
+ props->Capability.Value = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_waves_per_simd") == 0)
+ props->MaxWavesPerSIMD = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"lds_size_in_kb") == 0)
+ props->LDSSizeInKB = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"gds_size_in_kb") == 0)
+ props->GDSSizeInKB = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"wave_front_size") == 0)
+ props->WaveFrontSize = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"array_count") == 0)
+ props->NumShaderBanks = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"simd_arrays_per_engine") == 0)
+ props->NumArrays = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"cu_per_simd_array") == 0)
+ props->NumCUPerArray = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"simd_per_cu") == 0)
+ props->NumSIMDPerCU = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_slots_scratch_cu") == 0)
+ props->MaxSlotsScratchCU = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"fw_version") == 0)
+ fw_version = (uint16_t)prop_val;
+ else if (strcmp(prop_name,"vendor_id") == 0)
+ props->VendorId = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"device_id") == 0)
+ props->DeviceId = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"location_id") == 0)
+ props->LocationId = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_engine_clk_fcompute") == 0)
+ props->MaxEngineClockMhzFCompute = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_engine_clk_ccompute") == 0)
+ props->MaxEngineClockMhzCCompute = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"local_mem_size") == 0)
+ props->LocalMemSize = (uint32_t)prop_val;
+
+ }
+
+ get_cpu_stepping(&stepping);
+ props->EngineId = ((stepping << 16) | fw_version);
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_mem_props(uint32_t node_id, uint32_t mem_id, HsaMemoryProperties *props) {
+ FILE *fd;
+ char *read_buf, *p;
+ char prop_name[256];
+ char path[256];
+ long long unsigned int prop_val;
+ uint32_t prog;
+ int read_size;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(props);
+ snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id);
+ fd = fopen(path, "r");
+ if (!fd) {
+ return HSAKMT_STATUS_ERROR;
+ }
+ read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ goto err1;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ prog = 0;
+ p = read_buf;
+ while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+ if (strcmp(prop_name,"heap_type") == 0)
+ props->HeapType = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"size_in_bytes") == 0)
+ props->SizeInBytes = prop_val;
+ else if (strcmp(prop_name,"flags") == 0)
+ props->Flags.MemoryProperty = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"width") == 0)
+ props->Width = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"mem_clk_max") == 0)
+ props->MemoryClockMax = (uint32_t)prop_val;
+ }
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_cache_props(uint32_t node_id, uint32_t cache_id, HsaCacheProperties *props) {
+ FILE *fd;
+ char *read_buf, *p;
+ char prop_name[256];
+ char path[256];
+ long long unsigned int prop_val;
+ uint32_t i, prog;
+ int read_size;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(props);
+ snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id);
+ fd = fopen(path, "r");
+ if (!fd) {
+ return HSAKMT_STATUS_ERROR;
+ }
+ read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ goto err1;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ prog = 0;
+ p = read_buf;
+ while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+ if (strcmp(prop_name,"processor_id_low") == 0)
+ props->ProcessorIdLow = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"level") == 0)
+ props->CacheLevel = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"size") == 0)
+ props->CacheSize = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"cache_line_size") == 0)
+ props->CacheLineSize = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"cache_lines_per_tag") == 0)
+ props->CacheLinesPerTag = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"association") == 0)
+ props->CacheAssociativity = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"latency") == 0)
+ props->CacheLatency = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"type") == 0)
+ props->CacheType.Value = (uint32_t)prop_val;
+ else if (strcmp(prop_name, "sibling_map") == 0)
+ break;
+ }
+
+ prog = 0;
+ if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) {
+ i = 0;
+ while ((i < HSA_CPU_SIBLINGS) &&
+ (sscanf(p+=prog, "%u%*[,\n]%n", &props->SiblingMap[i++],
+ &prog) == 1));
+ }
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+ return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_iolink_props(uint32_t node_id, uint32_t iolink_id, HsaIoLinkProperties *props) {
+ FILE *fd;
+ char *read_buf, *p;
+ char prop_name[256];
+ char path[256];
+ long long unsigned int prop_val;
+ uint32_t prog;
+ int read_size;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+ assert(props);
+ snprintf(path, 256, "%s/%d/io_link/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id);
+ fd = fopen(path, "r");
+ if (!fd) {
+ return HSAKMT_STATUS_ERROR;
+ }
+ read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ goto err1;
+ }
+
+ read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = HSAKMT_STATUS_ERROR;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ prog = 0;
+ p = read_buf;
+ while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+ if (strcmp(prop_name,"type") == 0)
+ props->IoLinkType = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"version_major") == 0)
+ props->VersionMajor = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"version_minor") == 0)
+ props->VersionMinor = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"node_from") == 0)
+ props->NodeFrom = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"node_to") == 0)
+ props->NodeTo = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"weight") == 0)
+ props->Weight = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"min_latency") == 0)
+ props->MinimumLatency = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_latency") == 0)
+ props->MaximumLatency = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"min_bandwidth") == 0)
+ props->MinimumBandwidth = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"max_bandwidth") == 0)
+ props->MaximumBandwidth = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"recommended_transfer_size") == 0)
+ props->RecTransferSize = (uint32_t)prop_val;
+ else if (strcmp(prop_name,"flags") == 0)
+ props->Flags.LinkProperty = (uint32_t)prop_val;
+ }
+
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+ return ret;
+}
+
+HSAKMT_STATUS
+topology_take_snapshot(void)
+{
+ uint32_t gen_start, gen_end, i, j, mem_id, cache_id, link_id;
+ HsaSystemProperties sys_props;
+ node_t *temp_nodes = 0;
+ HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+retry:
+ ret = topology_sysfs_get_generation(&gen_start);
+ if (ret != HSAKMT_STATUS_SUCCESS)
+ return ret;
+ ret = topology_sysfs_get_system_props(&sys_props);
+ if (ret != HSAKMT_STATUS_SUCCESS)
+ return ret;
+ if(sys_props.NumNodes > 0) {
+ temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t),1);
+ if (!temp_nodes)
+ return HSAKMT_STATUS_NO_MEMORY;
+ for (i = 0; i < sys_props.NumNodes; i++) {
+ ret = topology_sysfs_get_node_props(i,
+ &temp_nodes[i].node,
+ &temp_nodes[i].gpu_id);
+ if (ret != HSAKMT_STATUS_SUCCESS) {
+ for (j=0; j < i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ if (temp_nodes[i].node.NumMemoryBanks) {
+ temp_nodes[i].mem = calloc(temp_nodes[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1);
+ if (!temp_nodes[i].mem) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ for (mem_id = 0; mem_id < temp_nodes[i].node.NumMemoryBanks; mem_id++) {
+ ret = topology_sysfs_get_mem_props(i, mem_id, &temp_nodes[i].mem[mem_id]);
+ if (ret != HSAKMT_STATUS_SUCCESS) {
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ }
+ }
+
+ if (temp_nodes[i].node.NumCaches) {
+ temp_nodes[i].cache = calloc(temp_nodes[i].node.NumCaches * sizeof(HsaCacheProperties), 1);
+ if (!temp_nodes[i].cache) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ for (cache_id = 0; cache_id < temp_nodes[i].node.NumCaches; cache_id++) {
+ ret = topology_sysfs_get_cache_props(i, cache_id, &temp_nodes[i].cache[cache_id]);
+ if (ret != HSAKMT_STATUS_SUCCESS) {
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ }
+ }
+
+ if (temp_nodes[i].node.NumIOLinks) {
+ temp_nodes[i].link = calloc(temp_nodes[i].node.NumIOLinks * sizeof(HsaIoLinkProperties), 1);
+ if (!temp_nodes[i].link) {
+ ret = HSAKMT_STATUS_NO_MEMORY;
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ for (link_id = 0; link_id < temp_nodes[i].node.NumIOLinks; link_id++) {
+ ret = topology_sysfs_get_iolink_props(i, link_id, &temp_nodes[i].link[link_id]);
+ if (ret != HSAKMT_STATUS_SUCCESS) {
+ for (j=0; j <= i; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ goto err;
+ }
+ }
+ }
+
+ }
+ }
+
+ ret = topology_sysfs_get_generation(&gen_end);
+ if (ret != HSAKMT_STATUS_SUCCESS) {
+ if (temp_nodes) {
+ for (j=0; j < sys_props.NumNodes; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ }
+ goto err;
+ }
+
+ if (gen_start != gen_end) {
+ if (temp_nodes) {
+ for (j=0; j < sys_props.NumNodes; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ temp_nodes = 0;
+ }
+ goto retry;
+ }
+
+ if (!system) {
+ system = malloc(sizeof(HsaSystemProperties));
+ if (!system) {
+ if (temp_nodes) {
+ for (j=0; j < sys_props.NumNodes; j++)
+ free_node(&temp_nodes[j]);
+ free(temp_nodes);
+ }
+ return HSAKMT_STATUS_NO_MEMORY;
+ }
+ }
+
+ *system = sys_props;
+ if (node)
+ free(node);
+ node = temp_nodes;
+err:
+
+ return ret;
+}
+
+/*
+ * Drop the Snashot of the HSA topology information.
+ * Assume lock is held.
+ */
+HSAKMT_STATUS
+topology_drop_snapshot(void)
+{
+ HSAKMT_STATUS err;
+
+ if (!!system != !!node) {
+ printf("Probable inconsistency?\n");
+ err = HSAKMT_STATUS_SUCCESS;
+ goto out;
+ }
+
+ if (node) {
+ uint64_t nodeid;
+
+ /* Remove state */
+ for (nodeid = 0; nodeid < system->NumNodes; nodeid++) {
+ free_node(&node[nodeid]);
+ }
+
+ free(node);
+ node = NULL;
+ }
+
+ free(system);
+ system = NULL;
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ return err;
+}
+
+HSAKMT_STATUS
+validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
+{
+ if (nodeid >= MAX_NODES || !node || !system || system->NumNodes <= nodeid)
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+ if (gpu_id)
+ *gpu_id = node[nodeid].gpu_id;
+
+ return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id){
+ uint64_t node_idx;
+ for(node_idx = 0; node_idx < system->NumNodes; node_idx++){
+ if (node[node_idx].gpu_id == gpu_id){
+ *node_id = node_idx;
+ return HSAKMT_STATUS_SUCCESS;
+ }
+ }
+
+ return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAcquireSystemProperties(
+ HsaSystemProperties* SystemProperties //OUT
+ )
+{
+ HSAKMT_STATUS err;
+ CHECK_KFD_OPEN();
+
+ if (!SystemProperties)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ err = topology_take_snapshot();
+ if (err != HSAKMT_STATUS_SUCCESS)
+ goto out;
+
+ assert(system);
+
+ *SystemProperties = *system;
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ pthread_mutex_unlock(&hsakmt_mutex);
+ return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReleaseSystemProperties(void)
+{
+ CHECK_KFD_OPEN();
+
+ HSAKMT_STATUS err;
+
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ err = topology_drop_snapshot();
+
+ pthread_mutex_unlock(&hsakmt_mutex);
+
+ return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeProperties(
+ HSAuint32 NodeId, //IN
+ HsaNodeProperties* NodeProperties //OUT
+ )
+{
+ HSAKMT_STATUS err;
+ uint32_t gpu_id;
+
+ if (!NodeProperties)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ CHECK_KFD_OPEN();
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ /* KFD ADD page 18, snapshot protocol violation */
+ if (system == NULL) {
+ err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+ assert(system);
+ goto out;
+ }
+
+ if (NodeId >= system->NumNodes) {
+ err = HSAKMT_STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ err = validate_nodeid(NodeId, &gpu_id);
+ if (err != HSAKMT_STATUS_SUCCESS)
+ return err;
+
+ *NodeProperties = node[NodeId].node;
+ NodeProperties->NumMemoryBanks += NUM_OF_HEAPS;
+
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ pthread_mutex_unlock(&hsakmt_mutex);
+ return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeMemoryProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumBanks, //IN
+ HsaMemoryProperties* MemoryProperties //OUT
+ )
+{
+ HSAKMT_STATUS err;
+ uint32_t i, gpu_id;
+
+ if (!MemoryProperties)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ CHECK_KFD_OPEN();
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ /* KFD ADD page 18, snapshot protocol violation */
+ if (system == NULL) {
+ err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+ assert(system);
+ goto out;
+ }
+
+ /* Check still necessary */
+ if (NodeId >= system->NumNodes ) {
+ err = HSAKMT_STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ err = validate_nodeid(NodeId, &gpu_id);
+ if (err != HSAKMT_STATUS_SUCCESS)
+ return err;
+
+ for (i = 0; i < MIN(node[NodeId].node.NumMemoryBanks, NumBanks); i++) {
+ assert(node[NodeId].mem);
+ MemoryProperties[i] = node[NodeId].mem[i];
+ }
+
+ /*Add LDS*/
+ if (i < NumBanks){
+ MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
+ MemoryProperties[i].SizeInBytes = node[NodeId].node.LDSSizeInKB * 1024;
+ MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_LDS, gpu_id);
+ i++;
+ }
+
+ /*Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE*/
+ if ((i < NumBanks) && (node[NodeId].node.LocalMemSize > 0)) {
+ MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
+ MemoryProperties[i].SizeInBytes = node[NodeId].node.LocalMemSize;
+ MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_GPUVM, gpu_id);
+ i++;
+ }
+
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ pthread_mutex_unlock(&hsakmt_mutex);
+ return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeCacheProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 ProcessorId, //IN
+ HSAuint32 NumCaches, //IN
+ HsaCacheProperties* CacheProperties //OUT
+ )
+{
+ HSAKMT_STATUS err;
+ uint32_t i;
+
+ if (!CacheProperties)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ CHECK_KFD_OPEN();
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ /* KFD ADD page 18, snapshot protocol violation */
+ if (system == NULL) {
+ err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+ assert(system);
+ goto out;
+ }
+
+ if (NodeId >= system->NumNodes || NumCaches > node[NodeId].node.NumCaches) {
+ err = HSAKMT_STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ for (i = 0; i < MIN(node[NodeId].node.NumCaches, NumCaches); i++) {
+ assert(node[NodeId].cache);
+ CacheProperties[i] = node[NodeId].cache[i];
+ }
+
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ pthread_mutex_unlock(&hsakmt_mutex);
+ return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeIoLinkProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumIoLinks, //IN
+ HsaIoLinkProperties* IoLinkProperties //OUT
+ )
+{
+ HSAKMT_STATUS err;
+ uint32_t i;
+
+ if (!IoLinkProperties)
+ return HSAKMT_STATUS_INVALID_PARAMETER;
+
+ CHECK_KFD_OPEN();
+
+ pthread_mutex_lock(&hsakmt_mutex);
+
+ /* KFD ADD page 18, snapshot protocol violation */
+ if (system == NULL) {
+ err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+ assert(system);
+ goto out;
+ }
+
+ if (NodeId >= system->NumNodes || NumIoLinks > node[NodeId].node.NumIOLinks) {
+ err = HSAKMT_STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ for (i = 0; i < MIN(node[NodeId].node.NumIOLinks, NumIoLinks); i++) {
+ assert(node[NodeId].link);
+ IoLinkProperties[i] = node[NodeId].link[i];
+ }
+
+ err = HSAKMT_STATUS_SUCCESS;
+
+out:
+ pthread_mutex_unlock(&hsakmt_mutex);
+ return err;
+}
+
+uint16_t get_device_id_by_node(HSAuint32 node_id)
+{
+ if (!node || !system || system->NumNodes <= node_id)
+ return 0;
+
+ return node[node_id].node.DeviceId;
+}
+
+static int get_cpu_stepping(uint16_t* stepping)
+{
+ int ret;
+ FILE* fd = fopen("/proc/cpuinfo", "r");
+ if (!fd)
+ return -1;
+
+ char* read_buf = malloc(PAGE_SIZE);
+ if (!read_buf) {
+ ret = -1;
+ goto err1;
+ }
+
+ int read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+ if (read_size <= 0) {
+ ret = -2;
+ goto err2;
+ }
+
+ /* Since we're using the buffer as a string, we make sure the string terminates */
+ if(read_size >= PAGE_SIZE)
+ read_size = PAGE_SIZE-1;
+ read_buf[read_size] = 0;
+
+ *stepping = 0;
+
+ char* p = strstr(read_buf, "stepping");
+ if (p)
+ sscanf(p , "stepping\t: %hu\n", stepping);
+
+err2:
+ free(read_buf);
+err1:
+ fclose(fd);
+
+ return ret;
+}
diff --git a/hsakmt/version.c b/hsakmt/version.c
new file mode 100644
index 0000000..95bfec6
--- /dev/null
+++ b/hsakmt/version.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <string.h>
+#include "linux/kfd_ioctl.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetVersion(
+ HsaVersionInfo* VersionInfo //OUT
+ )
+{
+ CHECK_KFD_OPEN();
+
+ struct kfd_ioctl_get_version_args args;
+ memset(&args, 0, sizeof(args));
+
+ if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
+ return HSAKMT_STATUS_ERROR;
+
+ VersionInfo->KernelInterfaceMajorVersion = args.major_version;
+ VersionInfo->KernelInterfaceMinorVersion = args.minor_version;
+
+ return HSAKMT_STATUS_SUCCESS;
+}